Impelement CutPoints 2 with points of view

2025-08-17 08:25:51 +00:00 · 2022-11-27 23:10:56 +01:00
parent 762d01741c
commit 3333adc395
7 changed files with 59 additions and 21 deletions
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -1,4 +1,6 @@
 #include "CPPFImdlp.h"
 #include <numeric>
 #include <iostream>
 namespace CPPFImdlp
 {
    CPPFImdlp::CPPFImdlp()
@@ -7,23 +9,38 @@ namespace CPPFImdlp
    CPPFImdlp::~CPPFImdlp()
    {
    }
-    std::vector<float> CPPFImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
+    std::vector<double> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
    {
-        std::vector<float> cutPts;
+        std::vector<double> cutPts;
-        int i, ant = X.at(0), anty = y.at(0);
+        double antx;
-        int n = X.size();
+        // int anty;
-        for (i = 1; i < n; i++)
+        std::vector<size_t> indices = sortIndices(X);
        antx = X.at(indices[0]);
        // anty = y.at(indices[0]);
        for (auto index = indices.begin(); index != indices.end(); ++index)
        {
-            if (X.at(i) != ant)
+            // std::cout << X.at(*index) << " -> " << y.at(*index) << " // ";
            //  Definition 2 Cut points are always on boundaries
            // if (y.at(*index) != anty && antx < X.at(*index))
            //  Weka implementation
            if (antx < X.at(*index))
            {
-                if (y.at(i) != anty)
+                // std::cout << "* (" << X.at(*index) << ", " << antx << ") // ";
-                {
+                cutPts.push_back((X.at(*index) + antx) / 2);
-                    cutPts.push_back(float(X.at(i) + ant) / 2);
+                // anty = y.at(*index);
                    ant = X.at(i);
                    anty = y.at(i);
                }
            }
            antx = X.at(*index);
        }
        // std::cout << std::endl;
        return cutPts;
    }
    std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
    {
        std::vector<size_t> idx(X.size());
        std::iota(idx.begin(), idx.end(), 0);
        for (std::size_t i = 0; i < X.size(); i++)
            stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2)
                        { return X[i1] < X[i2]; });
        return idx;
    }
 }
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -2,14 +2,18 @@
 #define CPPFIMDLP_H
 #include <vector>
 #include <Python.h>
 #include <utility>
 namespace CPPFImdlp
 {
    class CPPFImdlp
    {
    private:
        std::vector<size_t> sortIndices(std::vector<float> &);
    public:
        CPPFImdlp();
        ~CPPFImdlp();
-        std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &);
+        std::vector<double> cutPoints(std::vector<float> &, std::vector<int> &);
    };
 }
 #endif
--- a/fimdlp/cfimdlp.pyx
+++ b/fimdlp/cfimdlp.pyx
@@ -5,7 +5,7 @@ from libcpp.vector cimport vector
 cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
    cdef cppclass CPPFImdlp:
        CPPFImdlp() except + 
-        vector[float] cutPoints(vector[int]&, vector[int]&)
+        vector[double] cutPoints(vector[float]&, vector[int]&)
 cdef class CFImdlp:
    cdef CPPFImdlp *thisptr
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so
--- a/fimdlp/mdlp.py
+++ b/fimdlp/mdlp.py
@@ -96,8 +96,17 @@ class FImdlp(TransformerMixin, BaseEstimator):
                "Shape of input is different from what was seen" "in `fit`"
            )
        print("Cut points for each feature in Iris dataset:")
-        for i in range(0, self.n_features_):
+        yz = self.y_.copy()
-            data = np.sort(X[:, i])
+        xz = X[:, 0].copy()
-            Xcutpoints = self.discretizer_.cut_points(data, self.y_)
+        xzz = self.discretizer_.sort_vectors(xz, yz)
        print("Xz: ", xz)
        print("Yz: ", yz)
        print("Xzz: ", xzz)
        print("Solución:")
        print("Xz*: ", np.sort(X[:, 0]))
        print("yz*: ", yz[np.argsort(X[:, 0])])
        for i in range(0, 1):  # self.n_features_):
            datax = np.sort(X[:, i])
            Xcutpoints = self.discretizer_.cut_points(datax, self.y_)
            print(f"{self.features_[i]:20s}: {Xcutpoints}")
        return X
--- a/sample.py
+++ b/sample.py
@@ -1,9 +1,13 @@
 from sklearn.datasets import load_iris
 from fimdlp.mdlp import FImdlp
 from fimdlp.cppfimdlp import CFImdlp
 data = load_iris()
 X = data.data
 y = data.target
 features = data.feature_names
-test = FImdlp()
+# test = FImdlp()
-Xcutpoints = test.fit(X, y, features=features).transform(X)
+# Xcutpoints = test.fit(X, y, features=features).transform(X)
 clf = CFImdlp()
 print("Cut points for feature 0 in Iris dataset:")
 print(clf.cut_points(X[:, 0], y))
--- a/setup.py
+++ b/setup.py
@@ -10,9 +10,13 @@ setup(
    ext_modules=[
        Extension(
            name="cppfimdlp",
-            sources=["fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp"],
+            sources=[
                "fimdlp/cfimdlp.pyx",
                "fimdlp/CPPFImdlp.cpp",
            ],
            language="c++",
            include_dirs=["fimdlp"],
            extra_compile_args=["-std=c++20"],
        ),
    ]
 )