diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index e9418f5..e1ed848 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -2,16 +2,9 @@ #include #include #include -#include #include #include "Metrics.h" -namespace mdlp -{ - std::ostream &operator<<(std::ostream &os, const CutPointBody &cut) - { - os << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << "]"; - return os; - } +namespace mdlp { CPPFImdlp::CPPFImdlp() : debug(false), precision(6) { divider = pow(10, precision); @@ -23,70 +16,125 @@ namespace mdlp CPPFImdlp::~CPPFImdlp() { } - std::vector CPPFImdlp::getCutPoints() + std::vector CPPFImdlp::getCutPoints() { return cutPoints; } - std::vector CPPFImdlp::getDiscretizedValues() + labels CPPFImdlp::getDiscretizedValues() { return xDiscretized; } - void CPPFImdlp::fit(std::vector &X, std::vector &y) + void CPPFImdlp::fit(samples& X, labels& y) { this->X = X; this->y = y; this->indices = sortIndices(X); + this->xDiscretized = labels(X.size(), -1); + this->numClasses = Metrics::numClasses(y, indices, 0, X.size()); + computeCutPoints(); filterCutPoints(); applyCutPoints(); } - std::vector &CPPFImdlp::transform(std::vector &X) + labels& CPPFImdlp::transform(samples& X) { - std::vector indices_transform = sortIndices(X); + indices_t indices_transform = sortIndices(X); applyCutPoints(); return xDiscretized; } - void CPPFImdlp::debugPoints(std::vector &X, std::vector &y) + void CPPFImdlp::debugPoints(samples& X, labels& y) { std::cout << "+++++++++++++++++++++++" << std::endl; // for (auto i : sortIndices(X)) - std::vector indices = sortIndices(X); - for (size_t i = 0; i < indices.size(); i++) - { + indices_t indices = sortIndices(X); + for (size_t i = 0; i < indices.size(); i++) { printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]); } std::cout << "+++++++++++++++++++++++" << std::endl; fit(X, y); - for (auto item : cutPoints) - { - std::cout << item << " X[" << item.end << "]=" << X[item.end] << std::endl; + for (auto item : cutPoints) { + std::cout << item.start << " X[" << item.end << "]=" << X[item.end] << std::endl; } } void CPPFImdlp::applyCutPoints() { + for (auto cut : cutPoints) { + for (size_t i = cut.start; i < cut.end; i++) { + xDiscretized[indices[i]] = cut.classNumber; + } + } } - bool CPPFImdlp::evaluateCutPoint(CutPointBody point) + bool CPPFImdlp::evaluateCutPoint(CutPoint_t rest, CutPoint_t candidate) { - return true; + int k, k1, k2; + float ig, delta; + float ent, ent1, ent2; + float N = float(rest.end - rest.start); + if (N < 2) { + return false; + } + + k = Metrics::numClasses(y, indices, rest.start, rest.end); + k1 = Metrics::numClasses(y, indices, rest.start, candidate.end); + k2 = Metrics::numClasses(y, indices, candidate.end, rest.end); + ent = Metrics::entropy(y, indices, rest.start, rest.end, numClasses); + ent1 = Metrics::entropy(y, indices, rest.start, candidate.end, numClasses); + ent2 = Metrics::entropy(y, indices, candidate.end, rest.end, numClasses); + ig = Metrics::informationGain(y, indices, rest.start, rest.end, candidate.end, numClasses); + delta = log2(pow(3, k) - 2) - (k * ent - k1 * ent1 - k2 * ent2); + float term = 1 / N * (log2(N - 1) + delta); + std::cout << candidate + std::cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << std::endl; + std::cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << std::endl; + return (ig > term); } void CPPFImdlp::filterCutPoints() { - std::vector filtered; - for (auto item : cutPoints) - { - if (evaluateCutPoint(item)) - { + std::vector filtered; + CutPoint_t rest; + int classNumber = 0; + + rest.start = 0; + rest.end = X.size(); + rest.fromValue = std::numeric_limits::lowest(); + rest.toValue = std::numeric_limits::max(); + rest.classNumber = classNumber; + bool lastReject = false, first = true; + for (auto item : cutPoints) { + if (evaluateCutPoint(rest, item)) { + std::cout << "Accepted" << std::endl; + if (lastReject) { + if (first) { + item.fromValue = std::numeric_limits::lowest(); + item.start = indices[0]; + } else { + item.fromValue = filtered.back().toValue; + item.start = filtered.back().end; + } + } + //Assign class number to the interval (cutpoint) + item.classNumber = classNumber++; filtered.push_back(item); + first = false; + } else { + std::cout << "Rejected" << std::endl; + lastReject = true; } } + if (!first) + filtered.back().toValue = std::numeric_limits::max(); + else { + filtered.push_back(rest); + } + cutPoints = filtered; } void CPPFImdlp::computeCutPoints() { - std::vector cutPts; - CutPointBody cutPoint; - std::vector cutIdx; + std::vector cutPts; + CutPoint_t cutPoint; + indices_t cutIdx; float xPrev, xCur, xPivot; int yPrev, yCur, yPivot; size_t idxPrev, idxPivot, idx, numElements, start; @@ -99,28 +147,25 @@ namespace mdlp bool firstCutPoint = true; if (debug) printf("*idx=%lu -> (-1, -1) Prev(%3.1f, %d) Elementos: %lu\n", idx, xCur, yCur, numElements); - while (idx < numElements) - { + while (idx < numElements) { xPivot = xCur; yPivot = yCur; idxPivot = indices[idx]; if (debug) printf(" Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur); // Read the same values and check class changes - do - { + do { idx++; xCur = X[indices[idx]]; yCur = y[indices[idx]]; - if (yCur != yPivot && xCur == xPivot) - { + if (yCur != yPivot && xCur == xPivot) { yPivot = -1; } if (debug) printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur); - } while (idx < numElements && xCur == xPivot); - if (yPivot == -1 || yPrev != yCur) - { + } + while (idx < numElements && xCur == xPivot); + if (yPivot == -1 || yPrev != yCur) { cutPoint.start = start; cutPoint.end = idx - 1; start = idx; @@ -128,8 +173,7 @@ namespace mdlp cutPoint.toValue = (xPrev + xCur) / 2; cutPoint.classNumber = -1; firstCutPoint = false; - if (debug) - { + if (debug) { printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); } cutPts.push_back(cutPoint); @@ -139,8 +183,7 @@ namespace mdlp xPrev = xPivot; idxPrev = indices[idxPivot]; } - if (idx == numElements) - { + if (idx == numElements) { cutPoint.start = start; cutPoint.end = numElements; cutPoint.fromValue = firstCutPoint ? std::numeric_limits::lowest() : cutPts.back().toValue; @@ -155,31 +198,27 @@ namespace mdlp } void CPPFImdlp::computeCutPointsAnt() { - std::vector cutPts; - std::vector cutIdx; + samples cutPts; + labels cutIdx; float xPrev, cutPoint; int yPrev; size_t idxPrev; xPrev = X.at(indices[0]); yPrev = y.at(indices[0]); idxPrev = indices[0]; - if (debug) - { + if (debug) { std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl; } - for (auto index = indices.begin(); index != indices.end(); ++index) - { + for (auto index = indices.begin(); index != indices.end(); ++index) { // Definition 2 Cut points are always on boundaries - if (y.at(*index) != yPrev && xPrev < X.at(*index)) - { + if (y.at(*index) != yPrev && xPrev < X.at(*index)) { cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider; - if (debug) - { + if (debug) { std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //"; std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev; std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" - << ((X.at(*index) + xPrev) / 2) << "idxPrev" - << idxPrev << std::endl; + << ((X.at(*index) + xPrev) / 2) << "idxPrev" + << idxPrev << std::endl; } cutPts.push_back(cutPoint); cutIdx.push_back(idxPrev); @@ -191,13 +230,13 @@ namespace mdlp // cutPoints = cutPts; } // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes - std::vector CPPFImdlp::sortIndices(std::vector &X) + indices_t CPPFImdlp::sortIndices(samples& X) { - std::vector idx(X.size()); + indices_t idx(X.size()); std::iota(idx.begin(), idx.end(), 0); for (std::size_t i = 0; i < X.size(); i++) stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2) - { return X[i1] < X[i2]; }); + { return X[i1] < X[i2]; }); return idx; } } diff --git a/fimdlp/CPPFImdlp.h b/fimdlp/CPPFImdlp.h index 8897fbc..f0ebed9 100644 --- a/fimdlp/CPPFImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -1,44 +1,37 @@ #ifndef CPPFIMDLP_H #define CPPFIMDLP_H -#include +#include "typesFImdlp.h" #include -namespace mdlp -{ - struct CutPointBody - { - size_t start, end; // indices of the sorted vector - int classNumber; // class assigned to the cut point - float fromValue, toValue; // Values of the variable - }; - class CPPFImdlp - { +namespace mdlp { + class CPPFImdlp { private: bool debug; int precision; float divider; - std::vector indices; // sorted indices to use with X and y - std::vector X; - std::vector y; - std::vector xDiscretized; - std::vector cutPoints; + indices_t indices; // sorted indices to use with X and y + samples X; + labels y; + labels xDiscretized; + int numClasses; + std::vector cutPoints; protected: - std::vector sortIndices(std::vector &); - bool evaluateCutPoint(CutPointBody); - void filterCutPoints(); - void computeCutPoints(); - void applyCutPoints(); + indices_t sortIndices(samples&); void computeCutPointsAnt(); + void computeCutPoints(); + bool evaluateCutPoint(CutPoint_t, CutPoint_t); + void filterCutPoints(); + void applyCutPoints(); public: CPPFImdlp(); CPPFImdlp(int, bool debug = false); ~CPPFImdlp(); - std::vector getCutPoints(); - std::vector getDiscretizedValues(); - void debugPoints(std::vector &, std::vector &); - void fit(std::vector &, std::vector &); - std::vector &transform(std::vector &); + std::vector getCutPoints(); + labels getDiscretizedValues(); + void debugPoints(samples&, labels&); + void fit(samples&, labels&); + labels& transform(samples&); }; } #endif \ No newline at end of file diff --git a/fimdlp/Metrics.cpp b/fimdlp/Metrics.cpp index 96d5596..7f9b060 100644 --- a/fimdlp/Metrics.cpp +++ b/fimdlp/Metrics.cpp @@ -1,40 +1,35 @@ #include "Metrics.h" #include -namespace mdlp -{ +namespace mdlp { Metrics::Metrics() { } - int Metrics::numClasses(std::vector &y, std::vector indices, size_t start, size_t end) + int Metrics::numClasses(labels& y, indices_t indices, size_t start, size_t end) { std::set numClasses; - for (auto i = start; i < end; ++i) - { + for (auto i = start; i < end; ++i) { numClasses.insert(y[indices[i]]); } return numClasses.size(); } - float Metrics::entropy(std::vector &y, std::vector &indices, size_t start, size_t end, int nClasses) + float Metrics::entropy(labels& y, indices_t& indices, size_t start, size_t end, int nClasses) { float entropy = 0; int nElements = 0; - std::vector counts(nClasses + 1, 0); - for (auto i = &indices[start]; i != &indices[end]; ++i) - { + labels counts(nClasses + 1, 0); + for (auto i = &indices[start]; i != &indices[end]; ++i) { counts[y[*i]]++; nElements++; } - for (auto count : counts) - { - if (count > 0) - { + for (auto count : counts) { + if (count > 0) { float p = (float)count / nElements; entropy -= p * log2(p); } } return entropy; } - float Metrics::informationGain(std::vector &y, std::vector &indices, size_t start, size_t end, size_t cutPoint, int nClasses) + float Metrics::informationGain(labels& y, indices_t& indices, size_t start, size_t end, size_t cutPoint, int nClasses) { float iGain = 0.0; float entropy, entropyLeft, entropyRight; diff --git a/fimdlp/Metrics.h b/fimdlp/Metrics.h index e3d4344..41b9b2c 100644 --- a/fimdlp/Metrics.h +++ b/fimdlp/Metrics.h @@ -1,16 +1,14 @@ #ifndef METRICS_H #define METRICS_H -#include +#include "typesFImdlp.h" #include -namespace mdlp -{ - class Metrics - { +namespace mdlp { + class Metrics { public: Metrics(); - static int numClasses(std::vector &, std::vector, size_t, size_t); - static float entropy(std::vector &, std::vector &, size_t, size_t, int); - static float informationGain(std::vector &, std::vector &, size_t, size_t, size_t, int); + static int numClasses(labels&, indices_t, size_t, size_t); + static float entropy(labels&, indices_t&, size_t, size_t, int); + static float informationGain(labels&, indices_t&, size_t, size_t, size_t, int); }; } #endif \ No newline at end of file diff --git a/fimdlp/cfimdlp.pyx b/fimdlp/cfimdlp.pyx index 7aeedf1..ccce80f 100644 --- a/fimdlp/cfimdlp.pyx +++ b/fimdlp/cfimdlp.pyx @@ -12,13 +12,13 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp": CPPFImdlp() except + CPPFImdlp(int, bool) except + void fit(vector[float]&, vector[int]&) - vector[float] transform(vector[float]&) - vector[float] getDiscretizedValues() + vector[int] transform(vector[float]&) + vector[int] getDiscretizedValues() vector[CutPointBody] getCutPoints() void debugPoints(vector[float]&, vector[int]&) -class PCutPointBody: +class PCutPoint_t: def __init__(self, start, end, fromValue, toValue): self.start = start self.end = end @@ -37,7 +37,7 @@ cdef class CFImdlp: return self.thisptr.transform(X) def get_discretized_values(self): return self.thisptr.getDiscretizedValues() - def get_cut_points(self, X, y): + def get_cut_points(self): return self.thisptr.getCutPoints() def debug_points(self, X, y): return self.thisptr.debugPoints(X, y) diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index cc256ee..cccc05b 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/testcpp/FImdlp_unittest.cc b/fimdlp/testcpp/FImdlp_unittest.cc index a78fac3..61d5260 100644 --- a/fimdlp/testcpp/FImdlp_unittest.cc +++ b/fimdlp/testcpp/FImdlp_unittest.cc @@ -1,39 +1,76 @@ #include "gtest/gtest.h" +#include "../Metrics.h" #include "../CPPFImdlp.h" -namespace -{ - float precision = 0.000001; - class TestMetrics : protected mdlp::CPPFImdlp - { +namespace mdlp { + class TestMetrics : public CPPFImdlp, public testing::Test { public: - std::vector testSort(std::vector &X) + //TestMetrics(samples X, labels y, indices_t indices) : X(X), y(y), indices(indices), CPPFImdlp(true) {} + indices_t indices; // sorted indices to use with X and y + samples X; + labels y; + samples xDiscretized; + int numClasses; + float precision_test = 0.000001; + void SetUp() override { - return sortIndices(X); + X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; + indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; + y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; + numClasses = 2; + } + void check_sorted_vector(samples& X, indices_t indices) + { + this->X = X; + this->indices = indices; + indices_t testSortedIndices = sortIndices(X); + float prev = X[testSortedIndices[0]]; + for (auto i = 0; i < X.size(); ++i) { + EXPECT_EQ(testSortedIndices[i], indices[i]); + EXPECT_LE(prev, X[testSortedIndices[i]]); + prev = X[testSortedIndices[i]]; + } + } + std::vector testCutPoints(samples& X, indices_t& indices, labels& y) + { + this->X = X; + this->y = y; + this->indices = indices; + this->numClasses = Metrics::numClasses(y, indices, 0, X.size()); + + //computeCutPoints(); + return getCutPoints(); } }; - void check_sorted_vector(std::vector &X, std::vector indices) + // + TEST_F(TestMetrics, SortIndices) { - TestMetrics testClass = TestMetrics(); - std::vector testSortedIndices = testClass.testSort(X); - float prev = X[testSortedIndices[0]]; - for (auto i = 0; i < X.size(); ++i) - { - EXPECT_EQ(testSortedIndices[i], indices[i]); - EXPECT_LE(prev, X[testSortedIndices[i]]); - prev = X[testSortedIndices[i]]; + samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; + indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; + check_sorted_vector(X, indices); + X = { 5.77, 5.88, 5.99 }; + indices = { 0, 1, 2 }; + check_sorted_vector(X, indices); + X = { 5.33, 5.22, 5.11 }; + indices = { 2, 1, 0 }; + check_sorted_vector(X, indices); + } + // TEST_F(TestMetrics, EvaluateCutPoint) + // { + // CutPoint_t rest, candidate; + // rest.start = 0; + // rest.end = 10; + // candidate.start = 0; + // candidate.end = 5; + // float computed = evaluateCutPoint(rest, candidate); + // ASSERT_NEAR(0.468996, computed, precision_test); + // } + TEST_F(TestMetrics, ComputeCutPoints) + { + std::vector computed, expected; + computeCutPoints(); + computed = getCutPoints(); + for (auto cut : computed) { + std::cout << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << ")" << std::endl; } } - TEST(FImdlpTest, SortIndices) - { - - std::vector X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9}; - std::vector indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7}; - check_sorted_vector(X, indices); - X = {5.77, 5.88, 5.99}; - indices = {0, 1, 2}; - check_sorted_vector(X, indices); - X = {5.33, 5.22, 5.11}; - indices = {2, 1, 0}; - check_sorted_vector(X, indices); - } } \ No newline at end of file diff --git a/fimdlp/testcpp/Metrics_unittest.cc b/fimdlp/testcpp/Metrics_unittest.cc index 3c25fe2..c04ec0f 100644 --- a/fimdlp/testcpp/Metrics_unittest.cc +++ b/fimdlp/testcpp/Metrics_unittest.cc @@ -1,33 +1,31 @@ #include "gtest/gtest.h" #include "../Metrics.h" -namespace -{ - +namespace mdlp { float precision = 0.000001; TEST(MetricTest, NumClasses) { - std::vector y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; - std::vector indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - EXPECT_EQ(1, mdlp::Metrics::numClasses(y, indices, 4, 8)); - EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 0, 10)); - EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 8, 10)); + labels y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }; + indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + EXPECT_EQ(1, Metrics::numClasses(y, indices, 4, 8)); + EXPECT_EQ(2, Metrics::numClasses(y, indices, 0, 10)); + EXPECT_EQ(2, Metrics::numClasses(y, indices, 8, 10)); } TEST(MetricTest, Entropy) { - std::vector y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; - std::vector indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - EXPECT_EQ(1, mdlp::Metrics::entropy(y, indices, 0, 10, 2)); - EXPECT_EQ(0, mdlp::Metrics::entropy(y, indices, 0, 5, 1)); - std::vector yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; - ASSERT_NEAR(0.468996, mdlp::Metrics::entropy(yz, indices, 0, 10, 2), precision); + labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; + indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + EXPECT_EQ(1, Metrics::entropy(y, indices, 0, 10, 2)); + EXPECT_EQ(0, Metrics::entropy(y, indices, 0, 5, 1)); + labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }; + ASSERT_NEAR(0.468996, Metrics::entropy(yz, indices, 0, 10, 2), precision); } TEST(MetricTest, InformationGain) { - std::vector y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; - std::vector indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; - ASSERT_NEAR(1, mdlp::Metrics::informationGain(y, indices, 0, 10, 5, 2), precision); - ASSERT_NEAR(0.108032, mdlp::Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision); + labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; + indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }; + ASSERT_NEAR(1, Metrics::informationGain(y, indices, 0, 10, 5, 2), precision); + ASSERT_NEAR(0.108032, Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision); } } \ No newline at end of file diff --git a/fimdlp/testcpp/test b/fimdlp/testcpp/test new file mode 100755 index 0000000..e27cdde --- /dev/null +++ b/fimdlp/testcpp/test @@ -0,0 +1,12 @@ +cmake -S . -B build -Wno-dev +if test $? -ne 0; then + echo "Error in creating build commands." + exit 1 +fi +cmake --build build +if test $? -ne 0; then + echo "Error in build command." + exit 1 +fi +cd build +ctest --output-on-failure diff --git a/fimdlp/testcpp/test.sh b/fimdlp/testcpp/test.sh index 7c0aa40..e27cdde 100755 --- a/fimdlp/testcpp/test.sh +++ b/fimdlp/testcpp/test.sh @@ -1,4 +1,4 @@ -cmake -S . -B build +cmake -S . -B build -Wno-dev if test $? -ne 0; then echo "Error in creating build commands." exit 1 diff --git a/fimdlp/typesFImdlp.h b/fimdlp/typesFImdlp.h new file mode 100644 index 0000000..7214ce9 --- /dev/null +++ b/fimdlp/typesFImdlp.h @@ -0,0 +1,15 @@ +#ifndef TYPES_H +#define TYPES_H +#include +namespace mdlp { + typedef std::vector samples; + typedef std::vector labels; + typedef std::vector indices_t; + struct CutPointBody { + size_t start, end; // indices of the sorted vector + int classNumber; // class assigned to the cut point + float fromValue, toValue; + }; + typedef CutPointBody CutPoint_t; +} +#endif \ No newline at end of file diff --git a/prueba/FImdlp.cpp b/prueba/FImdlp.cpp index 19241c7..68c2f69 100644 --- a/prueba/FImdlp.cpp +++ b/prueba/FImdlp.cpp @@ -1,21 +1,18 @@ #include "FImdlp.h" -namespace FImdlp -{ +namespace FImdlp { FImdlp::FImdlp() { } FImdlp::~FImdlp() { } - std::vector FImdlp::cutPoints(std::vector &X, std::vector &y) + samples FImdlp::cutPoints(labels& X, labels& y) { - std::vector cutPts; + samples cutPts; int i, ant = X.at(0); int n = X.size(); - for (i = 1; i < n; i++) - { - if (X.at(i) != ant) - { + for (i = 1; i < n; i++) { + if (X.at(i) != ant) { cutPts.push_back(float(X.at(i) + ant) / 2); ant = X.at(i); } diff --git a/prueba/FImdlp.h b/prueba/FImdlp.h index d15cf8b..90c90ca 100644 --- a/prueba/FImdlp.h +++ b/prueba/FImdlp.h @@ -2,14 +2,12 @@ #define FIMDLP_H #include #include -namespace FImdlp -{ - class FImdlp - { +namespace FImdlp { + class FImdlp { public: FImdlp(); ~FImdlp(); - std::vector cutPoints(std::vector &, std::vector &); + samples cutPoints(labels&, labels&); }; } #endif \ No newline at end of file diff --git a/sample.py b/sample.py index 804cfc8..fd969bb 100644 --- a/sample.py +++ b/sample.py @@ -18,13 +18,22 @@ test = CFImdlp(debug=False) # k = test.cut_points_ant(X[:, 0], y) # print(k) # test.debug_points(X[:, 0], y) -result = test.cut_points(X[:, 0], y) +X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9] +indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7] +y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] +# test.fit(X[:, 0], y) +test.fit(X, y) +result = test.get_cut_points() for item in result: print( f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" ) - +print(test.get_discretized_values()) +# print(test.transform(X)) +# print(X) +# print(indices) +# print(np.array(X)[indices]) # X = np.array( # [