Impelement CutPoints 2 with points of view

This commit is contained in:
2022-11-27 23:10:56 +01:00
parent 762d01741c
commit 3333adc395
7 changed files with 59 additions and 21 deletions

View File

@@ -1,4 +1,6 @@
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include <numeric>
#include <iostream>
namespace CPPFImdlp namespace CPPFImdlp
{ {
CPPFImdlp::CPPFImdlp() CPPFImdlp::CPPFImdlp()
@@ -7,23 +9,38 @@ namespace CPPFImdlp
CPPFImdlp::~CPPFImdlp() CPPFImdlp::~CPPFImdlp()
{ {
} }
std::vector<float> CPPFImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y) std::vector<double> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
{ {
std::vector<float> cutPts; std::vector<double> cutPts;
int i, ant = X.at(0), anty = y.at(0); double antx;
int n = X.size(); // int anty;
for (i = 1; i < n; i++) std::vector<size_t> indices = sortIndices(X);
antx = X.at(indices[0]);
// anty = y.at(indices[0]);
for (auto index = indices.begin(); index != indices.end(); ++index)
{ {
if (X.at(i) != ant) // std::cout << X.at(*index) << " -> " << y.at(*index) << " // ";
// Definition 2 Cut points are always on boundaries
// if (y.at(*index) != anty && antx < X.at(*index))
// Weka implementation
if (antx < X.at(*index))
{ {
if (y.at(i) != anty) // std::cout << "* (" << X.at(*index) << ", " << antx << ") // ";
{ cutPts.push_back((X.at(*index) + antx) / 2);
cutPts.push_back(float(X.at(i) + ant) / 2); // anty = y.at(*index);
ant = X.at(i);
anty = y.at(i);
}
} }
antx = X.at(*index);
} }
// std::cout << std::endl;
return cutPts; return cutPts;
} }
std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
{
std::vector<size_t> idx(X.size());
std::iota(idx.begin(), idx.end(), 0);
for (std::size_t i = 0; i < X.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2)
{ return X[i1] < X[i2]; });
return idx;
}
} }

View File

@@ -2,14 +2,18 @@
#define CPPFIMDLP_H #define CPPFIMDLP_H
#include <vector> #include <vector>
#include <Python.h> #include <Python.h>
#include <utility>
namespace CPPFImdlp namespace CPPFImdlp
{ {
class CPPFImdlp class CPPFImdlp
{ {
private:
std::vector<size_t> sortIndices(std::vector<float> &);
public: public:
CPPFImdlp(); CPPFImdlp();
~CPPFImdlp(); ~CPPFImdlp();
std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &); std::vector<double> cutPoints(std::vector<float> &, std::vector<int> &);
}; };
} }
#endif #endif

View File

@@ -5,7 +5,7 @@ from libcpp.vector cimport vector
cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp": cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
cdef cppclass CPPFImdlp: cdef cppclass CPPFImdlp:
CPPFImdlp() except + CPPFImdlp() except +
vector[float] cutPoints(vector[int]&, vector[int]&) vector[double] cutPoints(vector[float]&, vector[int]&)
cdef class CFImdlp: cdef class CFImdlp:
cdef CPPFImdlp *thisptr cdef CPPFImdlp *thisptr

View File

@@ -96,8 +96,17 @@ class FImdlp(TransformerMixin, BaseEstimator):
"Shape of input is different from what was seen" "in `fit`" "Shape of input is different from what was seen" "in `fit`"
) )
print("Cut points for each feature in Iris dataset:") print("Cut points for each feature in Iris dataset:")
for i in range(0, self.n_features_): yz = self.y_.copy()
data = np.sort(X[:, i]) xz = X[:, 0].copy()
Xcutpoints = self.discretizer_.cut_points(data, self.y_) xzz = self.discretizer_.sort_vectors(xz, yz)
print("Xz: ", xz)
print("Yz: ", yz)
print("Xzz: ", xzz)
print("Solución:")
print("Xz*: ", np.sort(X[:, 0]))
print("yz*: ", yz[np.argsort(X[:, 0])])
for i in range(0, 1): # self.n_features_):
datax = np.sort(X[:, i])
Xcutpoints = self.discretizer_.cut_points(datax, self.y_)
print(f"{self.features_[i]:20s}: {Xcutpoints}") print(f"{self.features_[i]:20s}: {Xcutpoints}")
return X return X

View File

@@ -1,9 +1,13 @@
from sklearn.datasets import load_iris from sklearn.datasets import load_iris
from fimdlp.mdlp import FImdlp from fimdlp.mdlp import FImdlp
from fimdlp.cppfimdlp import CFImdlp
data = load_iris() data = load_iris()
X = data.data X = data.data
y = data.target y = data.target
features = data.feature_names features = data.feature_names
test = FImdlp() # test = FImdlp()
Xcutpoints = test.fit(X, y, features=features).transform(X) # Xcutpoints = test.fit(X, y, features=features).transform(X)
clf = CFImdlp()
print("Cut points for feature 0 in Iris dataset:")
print(clf.cut_points(X[:, 0], y))

View File

@@ -10,9 +10,13 @@ setup(
ext_modules=[ ext_modules=[
Extension( Extension(
name="cppfimdlp", name="cppfimdlp",
sources=["fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp"], sources=[
"fimdlp/cfimdlp.pyx",
"fimdlp/CPPFImdlp.cpp",
],
language="c++", language="c++",
include_dirs=["fimdlp"], include_dirs=["fimdlp"],
extra_compile_args=["-std=c++20"],
), ),
] ]
) )