diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index 96debe1..2f4dce2 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -5,7 +5,7 @@ #include #include #include "Metrics.h" -namespace CPPFImdlp +namespace mdlp { std::ostream &operator<<(std::ostream &os, const CutPointBody &cut) { @@ -23,6 +23,10 @@ namespace CPPFImdlp CPPFImdlp::~CPPFImdlp() { } + std::vector CPPFImdlp::getCutPoints() + { + return cutPoints; + } void CPPFImdlp::debugPoints(std::vector &X, std::vector &y) { std::cout << "+++++++++++++++++++++++" << std::endl; @@ -33,12 +37,13 @@ namespace CPPFImdlp printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]); } std::cout << "+++++++++++++++++++++++" << std::endl; - for (auto item : cutPoints(X, y)) + computeCutPoints(X, y); + for (auto item : cutPoints) { std::cout << item << " X[" << item.end << "]=" << X[item.end] << std::endl; } } - std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) + void CPPFImdlp::computeCutPoints(std::vector &X_, std::vector &y_) { std::vector cutPts; @@ -47,7 +52,9 @@ namespace CPPFImdlp float xPrev, xCur, xPivot; int yPrev, yCur, yPivot; size_t idxPrev, idxPivot, idx, numElements, start; - std::vector indices = sortIndices(X); + X = X_; + y = y_; + indices = sortIndices(X); xCur = xPrev = X[indices[0]]; yCur = yPrev = y[indices[0]]; numElements = indices.size() - 1; @@ -79,10 +86,11 @@ namespace CPPFImdlp if (yPivot == -1 || yPrev != yCur) { cutPoint.start = start; - cutPoint.end = idxPrev; + cutPoint.end = idx - 1; start = idx; cutPoint.fromValue = firstCutPoint ? std::numeric_limits::lowest() : cutPts.back().toValue; cutPoint.toValue = (xPrev + xCur) / 2; + cutPoint.classNumber = -1; firstCutPoint = false; if (debug) { @@ -95,20 +103,21 @@ namespace CPPFImdlp xPrev = xPivot; idxPrev = indices[idxPivot]; } - if (idxPrev >= numElements) + if (idx == numElements) { cutPoint.start = start; cutPoint.end = numElements; cutPoint.fromValue = firstCutPoint ? std::numeric_limits::lowest() : cutPts.back().toValue; cutPoint.toValue = std::numeric_limits::max(); + cutPoint.classNumber = -1; if (debug) printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); cutPts.push_back(cutPoint); cutIdx.push_back(idxPrev); } - return cutPts; + cutPoints = cutPts; } - std::vector CPPFImdlp::cutPointsAnt(std::vector &X, std::vector &y) + std::vector CPPFImdlp::computeCutPointsAnt(std::vector &X, std::vector &y) { std::vector cutPts; std::vector cutIdx; diff --git a/fimdlp/CPPFImdlp.h b/fimdlp/CPPFImdlp.h index baf5f9a..a60a93a 100644 --- a/fimdlp/CPPFImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -2,12 +2,13 @@ #define CPPFIMDLP_H #include #include -namespace CPPFImdlp +namespace mdlp { struct CutPointBody { - size_t start, end; - float fromValue, toValue; + size_t start, end; // indices of the sorted vector + int classNumber; // class assigned to the cut point + float fromValue, toValue; // Values of the variable }; class CPPFImdlp { @@ -15,15 +16,20 @@ namespace CPPFImdlp bool debug; int precision; float divider; - std::vector - sortIndices(std::vector &); + std::vector indices; // sorted indices to use with X and y + std::vector X; + std::vector y; + std::vector xDiscretized; + std::vector cutPoints; + std::vector sortIndices(std::vector &); public: CPPFImdlp(); CPPFImdlp(int, bool debug = false); ~CPPFImdlp(); - std::vector cutPoints(std::vector &, std::vector &); - std::vector cutPointsAnt(std::vector &, std::vector &); + std::vector getCutPoints(); + void computeCutPoints(std::vector &, std::vector &); + std::vector computeCutPointsAnt(std::vector &, std::vector &); void debugPoints(std::vector &, std::vector &); }; } diff --git a/fimdlp/Metrics.cpp b/fimdlp/Metrics.cpp index 9613bd4..372d4fb 100644 --- a/fimdlp/Metrics.cpp +++ b/fimdlp/Metrics.cpp @@ -1,5 +1,5 @@ #include "Metrics.h" -namespace CPPFImdlp +namespace mdlp { Metrics::Metrics() { diff --git a/fimdlp/Metrics.h b/fimdlp/Metrics.h index 3c8f2a8..e3d4344 100644 --- a/fimdlp/Metrics.h +++ b/fimdlp/Metrics.h @@ -2,7 +2,7 @@ #define METRICS_H #include #include -namespace CPPFImdlp +namespace mdlp { class Metrics { diff --git a/fimdlp/cfimdlp.pyx b/fimdlp/cfimdlp.pyx index 84d136e..28161c4 100644 --- a/fimdlp/cfimdlp.pyx +++ b/fimdlp/cfimdlp.pyx @@ -3,13 +3,26 @@ from libcpp.vector cimport vector from libcpp cimport bool -cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp": +cdef extern from "CPPFImdlp.h" namespace "mdlp": + cdef struct CutPointBody: + size_t start, end; + int classNumber; + float fromValue, toValue; cdef cppclass CPPFImdlp: CPPFImdlp() except + CPPFImdlp(int, bool) except + + vector[CutPointBody] getCutPoints() vector[float] cutPointsAnt(vector[float]&, vector[int]&) void debugPoints(vector[float]&, vector[int]&) + void computeCutPoints(vector[float]&, vector[int]&) + +class PCutPointBody: + def __init__(self, start, end, fromValue, toValue): + self.start = start + self.end = end + self.fromValue = fromValue + self.toValue = toValue cdef class CFImdlp: cdef CPPFImdlp *thisptr @@ -17,7 +30,11 @@ cdef class CFImdlp: self.thisptr = new CPPFImdlp(precision, debug) def __dealloc__(self): del self.thisptr + def cut_points(self, X, y): + self.thisptr.computeCutPoints(X, y) + return self.thisptr.getCutPoints() def cut_points_ant(self, X, y): - return self.thisptr.cutPointsAnt(X, y) + return self.get_cut_points(X, y) def debug_points(self, X, y): return self.thisptr.debugPoints(X, y) + \ No newline at end of file diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index c3d0d83..b0a5cc0 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/test b/fimdlp/test index d273a7a..aeaf513 100755 Binary files a/fimdlp/test and b/fimdlp/test differ diff --git a/fimdlp/test.cpp b/fimdlp/test.cpp index 34ce62e..b722339 100644 --- a/fimdlp/test.cpp +++ b/fimdlp/test.cpp @@ -1,17 +1,17 @@ #include "CPPFImdlp.h" #include -using namespace std; +using namespace mdlp; int main(int argc, char *argv[], char *envp[]) { { - CPPFImdlp::CPPFImdlp fimdlp = CPPFImdlp::CPPFImdlp(true); - vector X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - vector y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - vector cutPts = fimdlp.cutPoints(X, y); - for (auto &cutPt : cutPts) + CPPFImdlp fimdlp = CPPFImdlp(true); + std::vector X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + std::vector y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + fimdlp.computeCutPoints(X, y); + for (struct CutPointBody cutPt : fimdlp.getCutPoints()) { - cout << cutPt << endl; + std::cout << cutPt << std::endl; } return 0; } diff --git a/sample.py b/sample.py index d19dfa3..804cfc8 100644 --- a/sample.py +++ b/sample.py @@ -3,6 +3,7 @@ from fimdlp.mdlp import FImdlp from fimdlp.cppfimdlp import CFImdlp import numpy as np + data = load_iris() X = data.data y = data.target @@ -16,7 +17,14 @@ test = CFImdlp(debug=False) # print(k) # k = test.cut_points_ant(X[:, 0], y) # print(k) -test.debug_points(X[:, 0], y) +# test.debug_points(X[:, 0], y) +result = test.cut_points(X[:, 0], y) +for item in result: + print( + f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" + f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" + ) + # X = np.array( # [