Build sklearn transformer

This commit is contained in:
2022-11-27 11:35:21 +01:00
parent 6f4c650af9
commit f843c938fc
12 changed files with 152 additions and 44 deletions

2
.gitignore vendored
View File

@@ -4,7 +4,7 @@ __pycache__/
*$py.class *$py.class
# C extensions # C extensions
*.so build/**/*.so
# Distribution / packaging # Distribution / packaging
.Python .Python

View File

@@ -1 +1 @@
include fimdlp/FImdlp.h include fimdlp/CPPFImdlp.h

View File

@@ -16,9 +16,13 @@ push: ## Push code with tags
build: ## Build package build: ## Build package
rm -fr dist/* rm -fr dist/*
rm -fr build/* rm -fr build/*
#python setup.py build_ext
python -m build python -m build
buildext: ## Build extension
rm -fr dist/*
rm -fr build/*
python setup.py build_ext
audit: ## Audit pip audit: ## Audit pip
pip-audit pip-audit

View File

@@ -1,13 +1,13 @@
#include "FImdlp.h" #include "CPPFImdlp.h"
namespace FImdlp namespace CPPFImdlp
{ {
FImdlp::FImdlp() CPPFImdlp::CPPFImdlp()
{ {
} }
FImdlp::~FImdlp() CPPFImdlp::~CPPFImdlp()
{ {
} }
std::vector<float> FImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y) std::vector<float> CPPFImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
{ {
std::vector<float> cutPts; std::vector<float> cutPts;
int i, ant = X.at(0); int i, ant = X.at(0);

View File

@@ -1,14 +1,14 @@
#ifndef FIMDLP_H #ifndef CPPFIMDLP_H
#define FIMDLP_H #define CPPFIMDLP_H
#include <vector> #include <vector>
#include <Python.h> #include <Python.h>
namespace FImdlp namespace CPPFImdlp
{ {
class FImdlp class CPPFImdlp
{ {
public: public:
FImdlp(); CPPFImdlp();
~FImdlp(); ~CPPFImdlp();
std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &); std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &);
}; };
} }

View File

@@ -1 +1,3 @@
from ._version import __version__ from ._version import __version__
all = ["FImdlp", "__version__"]

View File

@@ -2,15 +2,15 @@
# cython: language_level = 3 # cython: language_level = 3
from libcpp.vector cimport vector from libcpp.vector cimport vector
cdef extern from "FImdlp.h" namespace "FImdlp": cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
cdef cppclass FImdlp: cdef cppclass CPPFImdlp:
FImdlp() except + CPPFImdlp() except +
vector[float] cutPoints(vector[int]&, vector[int]&) vector[float] cutPoints(vector[int]&, vector[int]&)
cdef class CFImdlp: cdef class CFImdlp:
cdef FImdlp *thisptr cdef CPPFImdlp *thisptr
def __cinit__(self): def __cinit__(self):
self.thisptr = new FImdlp() self.thisptr = new CPPFImdlp()
def __dealloc__(self): def __dealloc__(self):
del self.thisptr del self.thisptr
def cut_points(self, X, y): def cut_points(self, X, y):

Binary file not shown.

103
fimdlp/mdlp.py Normal file
View File

@@ -0,0 +1,103 @@
import numpy as np
from .cppfimdlp import CFImdlp
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.multiclass import unique_labels
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
class FImdlp(TransformerMixin, BaseEstimator):
"""Fayyad - Irani MDLP discretization algorithm.
Parameters
----------
demo_param : str, default='demo'
A parameter used for demonstation of how to pass and store paramters.
Attributes
----------
n_features_ : int
The number of features of the data passed to :meth:`fit`.
"""
def __init__(self):
pass
def _check_params_fit(self, X, y, expected_args, kwargs):
"""Check the common parameters passed to fit"""
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
self.classes_ = unique_labels(y)
self.n_classes_ = self.classes_.shape[0]
# Default values
self.class_name_ = "class"
self.features_ = [f"feature_{i}" for i in range(X.shape[1])]
for key, value in kwargs.items():
if key in expected_args:
setattr(self, f"{key}_", value)
else:
raise ValueError(f"Unexpected argument: {key}")
if len(self.features_) != X.shape[1]:
raise ValueError(
"Number of features does not match the number of columns in X"
)
return X, y
def fit(self, X, y, **kwargs):
"""A reference implementation of a fitting function for a transformer.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
y : None
There is no need of a target in a transformer, yet the pipeline API
requires this parameter.
Returns
-------
self : object
Returns self.
"""
X, y = self._check_params_fit(
X, y, expected_args=["class_name", "features"], kwargs=kwargs
)
self.n_features_ = X.shape[1]
self.X_ = X
self.y_ = y
self.discretizer_ = CFImdlp()
return self
def transform(self, X):
"""Discretize X values.
Parameters
----------
X : {array-like}, shape (n_samples, n_features)
The input samples.
Returns
-------
X_transformed : array, shape (n_samples, n_features)
The array containing the discretized values of ``X``.
"""
# Check is fit had been called
check_is_fitted(self, "n_features_")
# Input validation
X = check_array(X)
if (X != self.X_).any():
raise ValueError(
"X values are not the same as the ones used to fit the model."
)
# Check that the input is of the same shape as the one passed
# during fit.
if X.shape[1] != self.n_features_:
raise ValueError(
"Shape of input is different from what was seen" "in `fit`"
)
print("Cut points for each feature in Iris dataset:")
for i in range(0, self.n_features_):
data = np.sort(X[:, i])
Xcutpoints = self.discretizer_.cut_points(data, self.y_)
print(f"{self.features_[i]:20s}: {Xcutpoints}")
return X

View File

@@ -37,3 +37,21 @@ classifiers = [
[project.urls] [project.urls]
Home = "https://github.com/doctorado-ml/FImdlp" Home = "https://github.com/doctorado-ml/FImdlp"
[tool.black]
line-length = 79
target_version = ['py38', 'py39', 'py310']
include = '\.pyi?$'
exclude = '''
/(
\.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''

View File

@@ -1,14 +1,9 @@
import numpy as np
from sklearn.datasets import load_iris from sklearn.datasets import load_iris
from fimdlp import CFImdlp from fimdlp.mdlp import FImdlp
data = load_iris() data = load_iris()
X = data.data X = data.data
y = data.target y = data.target
features = data.feature_names features = data.feature_names
test = CFImdlp() test = FImdlp()
print("Cut points for each feature in Iris dataset:") Xcutpoints = test.fit(X, y, features=features).transform(X)
for i in range(0, X.shape[1]):
data = np.sort(X[:, i])
Xcutpoints = test.cut_points(data, y)
print(f"{features[i]:20s}: {Xcutpoints}")

View File

@@ -9,24 +9,10 @@ from setuptools import Extension, setup
setup( setup(
ext_modules=[ ext_modules=[
Extension( Extension(
name="fimdlp", name="cppfimdlp",
sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"], sources=["fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp"],
language="c++", language="c++",
include_dirs=["fimdlp"], include_dirs=["fimdlp"],
), ),
] ]
) )
# from Cython.Build import cythonize
# setup(
# ext_modules=cythonize(
# Extension(
# "fimdlp",
# sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"],
# language="c++",
# include_dirs=["fimdlp"],
# ),
# include_path=["./fimdlp"],
# )
# )