mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Build sklearn transformer
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,7 +4,7 @@ __pycache__/
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
build/**/*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
|
@@ -1 +1 @@
|
||||
include fimdlp/FImdlp.h
|
||||
include fimdlp/CPPFImdlp.h
|
||||
|
6
Makefile
6
Makefile
@@ -16,9 +16,13 @@ push: ## Push code with tags
|
||||
build: ## Build package
|
||||
rm -fr dist/*
|
||||
rm -fr build/*
|
||||
#python setup.py build_ext
|
||||
python -m build
|
||||
|
||||
buildext: ## Build extension
|
||||
rm -fr dist/*
|
||||
rm -fr build/*
|
||||
python setup.py build_ext
|
||||
|
||||
audit: ## Audit pip
|
||||
pip-audit
|
||||
|
||||
|
@@ -1,13 +1,13 @@
|
||||
#include "FImdlp.h"
|
||||
namespace FImdlp
|
||||
#include "CPPFImdlp.h"
|
||||
namespace CPPFImdlp
|
||||
{
|
||||
FImdlp::FImdlp()
|
||||
CPPFImdlp::CPPFImdlp()
|
||||
{
|
||||
}
|
||||
FImdlp::~FImdlp()
|
||||
CPPFImdlp::~CPPFImdlp()
|
||||
{
|
||||
}
|
||||
std::vector<float> FImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
|
||||
std::vector<float> CPPFImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
|
||||
{
|
||||
std::vector<float> cutPts;
|
||||
int i, ant = X.at(0);
|
@@ -1,14 +1,14 @@
|
||||
#ifndef FIMDLP_H
|
||||
#define FIMDLP_H
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
#include <vector>
|
||||
#include <Python.h>
|
||||
namespace FImdlp
|
||||
namespace CPPFImdlp
|
||||
{
|
||||
class FImdlp
|
||||
class CPPFImdlp
|
||||
{
|
||||
public:
|
||||
FImdlp();
|
||||
~FImdlp();
|
||||
CPPFImdlp();
|
||||
~CPPFImdlp();
|
||||
std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &);
|
||||
};
|
||||
}
|
@@ -1 +1,3 @@
|
||||
from ._version import __version__
|
||||
from ._version import __version__
|
||||
|
||||
all = ["FImdlp", "__version__"]
|
||||
|
@@ -2,15 +2,15 @@
|
||||
# cython: language_level = 3
|
||||
from libcpp.vector cimport vector
|
||||
|
||||
cdef extern from "FImdlp.h" namespace "FImdlp":
|
||||
cdef cppclass FImdlp:
|
||||
FImdlp() except +
|
||||
cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
|
||||
cdef cppclass CPPFImdlp:
|
||||
CPPFImdlp() except +
|
||||
vector[float] cutPoints(vector[int]&, vector[int]&)
|
||||
|
||||
cdef class CFImdlp:
|
||||
cdef FImdlp *thisptr
|
||||
cdef CPPFImdlp *thisptr
|
||||
def __cinit__(self):
|
||||
self.thisptr = new FImdlp()
|
||||
self.thisptr = new CPPFImdlp()
|
||||
def __dealloc__(self):
|
||||
del self.thisptr
|
||||
def cut_points(self, X, y):
|
||||
|
BIN
fimdlp/cppfimdlp.cpython-310-darwin.so
Executable file
BIN
fimdlp/cppfimdlp.cpython-310-darwin.so
Executable file
Binary file not shown.
103
fimdlp/mdlp.py
Normal file
103
fimdlp/mdlp.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
from .cppfimdlp import CFImdlp
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
from sklearn.utils.multiclass import unique_labels
|
||||
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
||||
|
||||
|
||||
class FImdlp(TransformerMixin, BaseEstimator):
|
||||
"""Fayyad - Irani MDLP discretization algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
demo_param : str, default='demo'
|
||||
A parameter used for demonstation of how to pass and store paramters.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
n_features_ : int
|
||||
The number of features of the data passed to :meth:`fit`.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _check_params_fit(self, X, y, expected_args, kwargs):
|
||||
"""Check the common parameters passed to fit"""
|
||||
# Check that X and y have correct shape
|
||||
X, y = check_X_y(X, y)
|
||||
# Store the classes seen during fit
|
||||
self.classes_ = unique_labels(y)
|
||||
self.n_classes_ = self.classes_.shape[0]
|
||||
# Default values
|
||||
self.class_name_ = "class"
|
||||
self.features_ = [f"feature_{i}" for i in range(X.shape[1])]
|
||||
for key, value in kwargs.items():
|
||||
if key in expected_args:
|
||||
setattr(self, f"{key}_", value)
|
||||
else:
|
||||
raise ValueError(f"Unexpected argument: {key}")
|
||||
if len(self.features_) != X.shape[1]:
|
||||
raise ValueError(
|
||||
"Number of features does not match the number of columns in X"
|
||||
)
|
||||
return X, y
|
||||
|
||||
def fit(self, X, y, **kwargs):
|
||||
"""A reference implementation of a fitting function for a transformer.
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape (n_samples, n_features)
|
||||
The training input samples.
|
||||
y : None
|
||||
There is no need of a target in a transformer, yet the pipeline API
|
||||
requires this parameter.
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns self.
|
||||
"""
|
||||
X, y = self._check_params_fit(
|
||||
X, y, expected_args=["class_name", "features"], kwargs=kwargs
|
||||
)
|
||||
|
||||
self.n_features_ = X.shape[1]
|
||||
self.X_ = X
|
||||
self.y_ = y
|
||||
self.discretizer_ = CFImdlp()
|
||||
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
"""Discretize X values.
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like}, shape (n_samples, n_features)
|
||||
The input samples.
|
||||
Returns
|
||||
-------
|
||||
X_transformed : array, shape (n_samples, n_features)
|
||||
The array containing the discretized values of ``X``.
|
||||
"""
|
||||
# Check is fit had been called
|
||||
check_is_fitted(self, "n_features_")
|
||||
|
||||
# Input validation
|
||||
X = check_array(X)
|
||||
if (X != self.X_).any():
|
||||
raise ValueError(
|
||||
"X values are not the same as the ones used to fit the model."
|
||||
)
|
||||
|
||||
# Check that the input is of the same shape as the one passed
|
||||
# during fit.
|
||||
if X.shape[1] != self.n_features_:
|
||||
raise ValueError(
|
||||
"Shape of input is different from what was seen" "in `fit`"
|
||||
)
|
||||
print("Cut points for each feature in Iris dataset:")
|
||||
for i in range(0, self.n_features_):
|
||||
data = np.sort(X[:, i])
|
||||
Xcutpoints = self.discretizer_.cut_points(data, self.y_)
|
||||
print(f"{self.features_[i]:20s}: {Xcutpoints}")
|
||||
return X
|
@@ -37,3 +37,21 @@ classifiers = [
|
||||
|
||||
[project.urls]
|
||||
Home = "https://github.com/doctorado-ml/FImdlp"
|
||||
|
||||
[tool.black]
|
||||
line-length = 79
|
||||
target_version = ['py38', 'py39', 'py310']
|
||||
include = '\.pyi?$'
|
||||
exclude = '''
|
||||
/(
|
||||
\.git
|
||||
| \.hg
|
||||
| \.mypy_cache
|
||||
| \.tox
|
||||
| \.venv
|
||||
| _build
|
||||
| buck-out
|
||||
| build
|
||||
| dist
|
||||
)/
|
||||
'''
|
||||
|
11
sample.py
11
sample.py
@@ -1,14 +1,9 @@
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_iris
|
||||
from fimdlp import CFImdlp
|
||||
from fimdlp.mdlp import FImdlp
|
||||
|
||||
data = load_iris()
|
||||
X = data.data
|
||||
y = data.target
|
||||
features = data.feature_names
|
||||
test = CFImdlp()
|
||||
print("Cut points for each feature in Iris dataset:")
|
||||
for i in range(0, X.shape[1]):
|
||||
data = np.sort(X[:, i])
|
||||
Xcutpoints = test.cut_points(data, y)
|
||||
print(f"{features[i]:20s}: {Xcutpoints}")
|
||||
test = FImdlp()
|
||||
Xcutpoints = test.fit(X, y, features=features).transform(X)
|
||||
|
18
setup.py
18
setup.py
@@ -9,24 +9,10 @@ from setuptools import Extension, setup
|
||||
setup(
|
||||
ext_modules=[
|
||||
Extension(
|
||||
name="fimdlp",
|
||||
sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"],
|
||||
name="cppfimdlp",
|
||||
sources=["fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp"],
|
||||
language="c++",
|
||||
include_dirs=["fimdlp"],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
# from Cython.Build import cythonize
|
||||
# setup(
|
||||
# ext_modules=cythonize(
|
||||
# Extension(
|
||||
# "fimdlp",
|
||||
# sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"],
|
||||
# language="c++",
|
||||
# include_dirs=["fimdlp"],
|
||||
# ),
|
||||
# include_path=["./fimdlp"],
|
||||
# )
|
||||
# )
|
||||
|
||||
|
Reference in New Issue
Block a user