mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-18 00:45:52 +00:00
Build sklearn transformer
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,7 +4,7 @@ __pycache__/
|
|||||||
*$py.class
|
*$py.class
|
||||||
|
|
||||||
# C extensions
|
# C extensions
|
||||||
*.so
|
build/**/*.so
|
||||||
|
|
||||||
# Distribution / packaging
|
# Distribution / packaging
|
||||||
.Python
|
.Python
|
||||||
|
@@ -1 +1 @@
|
|||||||
include fimdlp/FImdlp.h
|
include fimdlp/CPPFImdlp.h
|
||||||
|
6
Makefile
6
Makefile
@@ -16,9 +16,13 @@ push: ## Push code with tags
|
|||||||
build: ## Build package
|
build: ## Build package
|
||||||
rm -fr dist/*
|
rm -fr dist/*
|
||||||
rm -fr build/*
|
rm -fr build/*
|
||||||
#python setup.py build_ext
|
|
||||||
python -m build
|
python -m build
|
||||||
|
|
||||||
|
buildext: ## Build extension
|
||||||
|
rm -fr dist/*
|
||||||
|
rm -fr build/*
|
||||||
|
python setup.py build_ext
|
||||||
|
|
||||||
audit: ## Audit pip
|
audit: ## Audit pip
|
||||||
pip-audit
|
pip-audit
|
||||||
|
|
||||||
|
@@ -1,13 +1,13 @@
|
|||||||
#include "FImdlp.h"
|
#include "CPPFImdlp.h"
|
||||||
namespace FImdlp
|
namespace CPPFImdlp
|
||||||
{
|
{
|
||||||
FImdlp::FImdlp()
|
CPPFImdlp::CPPFImdlp()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
FImdlp::~FImdlp()
|
CPPFImdlp::~CPPFImdlp()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
std::vector<float> FImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
|
std::vector<float> CPPFImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
|
||||||
{
|
{
|
||||||
std::vector<float> cutPts;
|
std::vector<float> cutPts;
|
||||||
int i, ant = X.at(0);
|
int i, ant = X.at(0);
|
@@ -1,14 +1,14 @@
|
|||||||
#ifndef FIMDLP_H
|
#ifndef CPPFIMDLP_H
|
||||||
#define FIMDLP_H
|
#define CPPFIMDLP_H
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
namespace FImdlp
|
namespace CPPFImdlp
|
||||||
{
|
{
|
||||||
class FImdlp
|
class CPPFImdlp
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
FImdlp();
|
CPPFImdlp();
|
||||||
~FImdlp();
|
~CPPFImdlp();
|
||||||
std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &);
|
std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &);
|
||||||
};
|
};
|
||||||
}
|
}
|
@@ -1 +1,3 @@
|
|||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
|
all = ["FImdlp", "__version__"]
|
||||||
|
@@ -2,15 +2,15 @@
|
|||||||
# cython: language_level = 3
|
# cython: language_level = 3
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
|
||||||
cdef extern from "FImdlp.h" namespace "FImdlp":
|
cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
|
||||||
cdef cppclass FImdlp:
|
cdef cppclass CPPFImdlp:
|
||||||
FImdlp() except +
|
CPPFImdlp() except +
|
||||||
vector[float] cutPoints(vector[int]&, vector[int]&)
|
vector[float] cutPoints(vector[int]&, vector[int]&)
|
||||||
|
|
||||||
cdef class CFImdlp:
|
cdef class CFImdlp:
|
||||||
cdef FImdlp *thisptr
|
cdef CPPFImdlp *thisptr
|
||||||
def __cinit__(self):
|
def __cinit__(self):
|
||||||
self.thisptr = new FImdlp()
|
self.thisptr = new CPPFImdlp()
|
||||||
def __dealloc__(self):
|
def __dealloc__(self):
|
||||||
del self.thisptr
|
del self.thisptr
|
||||||
def cut_points(self, X, y):
|
def cut_points(self, X, y):
|
||||||
|
BIN
fimdlp/cppfimdlp.cpython-310-darwin.so
Executable file
BIN
fimdlp/cppfimdlp.cpython-310-darwin.so
Executable file
Binary file not shown.
103
fimdlp/mdlp.py
Normal file
103
fimdlp/mdlp.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
import numpy as np
|
||||||
|
from .cppfimdlp import CFImdlp
|
||||||
|
from sklearn.base import BaseEstimator, TransformerMixin
|
||||||
|
from sklearn.utils.multiclass import unique_labels
|
||||||
|
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
||||||
|
|
||||||
|
|
||||||
|
class FImdlp(TransformerMixin, BaseEstimator):
|
||||||
|
"""Fayyad - Irani MDLP discretization algorithm.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
demo_param : str, default='demo'
|
||||||
|
A parameter used for demonstation of how to pass and store paramters.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
n_features_ : int
|
||||||
|
The number of features of the data passed to :meth:`fit`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _check_params_fit(self, X, y, expected_args, kwargs):
|
||||||
|
"""Check the common parameters passed to fit"""
|
||||||
|
# Check that X and y have correct shape
|
||||||
|
X, y = check_X_y(X, y)
|
||||||
|
# Store the classes seen during fit
|
||||||
|
self.classes_ = unique_labels(y)
|
||||||
|
self.n_classes_ = self.classes_.shape[0]
|
||||||
|
# Default values
|
||||||
|
self.class_name_ = "class"
|
||||||
|
self.features_ = [f"feature_{i}" for i in range(X.shape[1])]
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if key in expected_args:
|
||||||
|
setattr(self, f"{key}_", value)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unexpected argument: {key}")
|
||||||
|
if len(self.features_) != X.shape[1]:
|
||||||
|
raise ValueError(
|
||||||
|
"Number of features does not match the number of columns in X"
|
||||||
|
)
|
||||||
|
return X, y
|
||||||
|
|
||||||
|
def fit(self, X, y, **kwargs):
|
||||||
|
"""A reference implementation of a fitting function for a transformer.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
X : {array-like, sparse matrix}, shape (n_samples, n_features)
|
||||||
|
The training input samples.
|
||||||
|
y : None
|
||||||
|
There is no need of a target in a transformer, yet the pipeline API
|
||||||
|
requires this parameter.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
self : object
|
||||||
|
Returns self.
|
||||||
|
"""
|
||||||
|
X, y = self._check_params_fit(
|
||||||
|
X, y, expected_args=["class_name", "features"], kwargs=kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
self.n_features_ = X.shape[1]
|
||||||
|
self.X_ = X
|
||||||
|
self.y_ = y
|
||||||
|
self.discretizer_ = CFImdlp()
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def transform(self, X):
|
||||||
|
"""Discretize X values.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
X : {array-like}, shape (n_samples, n_features)
|
||||||
|
The input samples.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
X_transformed : array, shape (n_samples, n_features)
|
||||||
|
The array containing the discretized values of ``X``.
|
||||||
|
"""
|
||||||
|
# Check is fit had been called
|
||||||
|
check_is_fitted(self, "n_features_")
|
||||||
|
|
||||||
|
# Input validation
|
||||||
|
X = check_array(X)
|
||||||
|
if (X != self.X_).any():
|
||||||
|
raise ValueError(
|
||||||
|
"X values are not the same as the ones used to fit the model."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that the input is of the same shape as the one passed
|
||||||
|
# during fit.
|
||||||
|
if X.shape[1] != self.n_features_:
|
||||||
|
raise ValueError(
|
||||||
|
"Shape of input is different from what was seen" "in `fit`"
|
||||||
|
)
|
||||||
|
print("Cut points for each feature in Iris dataset:")
|
||||||
|
for i in range(0, self.n_features_):
|
||||||
|
data = np.sort(X[:, i])
|
||||||
|
Xcutpoints = self.discretizer_.cut_points(data, self.y_)
|
||||||
|
print(f"{self.features_[i]:20s}: {Xcutpoints}")
|
||||||
|
return X
|
@@ -37,3 +37,21 @@ classifiers = [
|
|||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Home = "https://github.com/doctorado-ml/FImdlp"
|
Home = "https://github.com/doctorado-ml/FImdlp"
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 79
|
||||||
|
target_version = ['py38', 'py39', 'py310']
|
||||||
|
include = '\.pyi?$'
|
||||||
|
exclude = '''
|
||||||
|
/(
|
||||||
|
\.git
|
||||||
|
| \.hg
|
||||||
|
| \.mypy_cache
|
||||||
|
| \.tox
|
||||||
|
| \.venv
|
||||||
|
| _build
|
||||||
|
| buck-out
|
||||||
|
| build
|
||||||
|
| dist
|
||||||
|
)/
|
||||||
|
'''
|
||||||
|
11
sample.py
11
sample.py
@@ -1,14 +1,9 @@
|
|||||||
import numpy as np
|
|
||||||
from sklearn.datasets import load_iris
|
from sklearn.datasets import load_iris
|
||||||
from fimdlp import CFImdlp
|
from fimdlp.mdlp import FImdlp
|
||||||
|
|
||||||
data = load_iris()
|
data = load_iris()
|
||||||
X = data.data
|
X = data.data
|
||||||
y = data.target
|
y = data.target
|
||||||
features = data.feature_names
|
features = data.feature_names
|
||||||
test = CFImdlp()
|
test = FImdlp()
|
||||||
print("Cut points for each feature in Iris dataset:")
|
Xcutpoints = test.fit(X, y, features=features).transform(X)
|
||||||
for i in range(0, X.shape[1]):
|
|
||||||
data = np.sort(X[:, i])
|
|
||||||
Xcutpoints = test.cut_points(data, y)
|
|
||||||
print(f"{features[i]:20s}: {Xcutpoints}")
|
|
||||||
|
18
setup.py
18
setup.py
@@ -9,24 +9,10 @@ from setuptools import Extension, setup
|
|||||||
setup(
|
setup(
|
||||||
ext_modules=[
|
ext_modules=[
|
||||||
Extension(
|
Extension(
|
||||||
name="fimdlp",
|
name="cppfimdlp",
|
||||||
sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"],
|
sources=["fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp"],
|
||||||
language="c++",
|
language="c++",
|
||||||
include_dirs=["fimdlp"],
|
include_dirs=["fimdlp"],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
# from Cython.Build import cythonize
|
|
||||||
# setup(
|
|
||||||
# ext_modules=cythonize(
|
|
||||||
# Extension(
|
|
||||||
# "fimdlp",
|
|
||||||
# sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"],
|
|
||||||
# language="c++",
|
|
||||||
# include_dirs=["fimdlp"],
|
|
||||||
# ),
|
|
||||||
# include_path=["./fimdlp"],
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user