diff --git a/.gitignore b/.gitignore index ceb5580..b3632c1 100644 --- a/.gitignore +++ b/.gitignore @@ -130,4 +130,5 @@ dmypy.json cfimdlp.cpp .vscode/* **/.idea/* - +cmake-build-debug +cmake-build-debug/** diff --git a/fimdlp/CMakeLists.txt b/fimdlp/CMakeLists.txt new file mode 100644 index 0000000..abd9f8d --- /dev/null +++ b/fimdlp/CMakeLists.txt @@ -0,0 +1,183 @@ +cmake_minimum_required(VERSION 3.24) +project(fimdlp) + +set(CMAKE_CXX_STANDARD 14) + +include_directories(.) +include_directories(testcpp/build/_deps/googletest-src/googlemock/include) +include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock) +include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal) +include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom) +include_directories(testcpp/build/_deps/googletest-src/googlemock/test) +include_directories(testcpp/build/_deps/googletest-src/googletest/include) +include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest) +include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal) +include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom) +include_directories(testcpp/build/_deps/googletest-src/googletest/samples) +include_directories(testcpp/build/_deps/googletest-src/googletest/src) +include_directories(testcpp/build/_deps/googletest-src/googletest/test) + +add_executable(fimdlp + testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-matchers.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-port.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-internal-utils.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-port.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-pp.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-actions.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-cardinalities.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-function-mocker.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-matchers.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-actions.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-matchers.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-nice-strict.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-spec-builders.h + testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock.h + testcpp/build/_deps/googletest-src/googlemock/src/gmock-all.cc + testcpp/build/_deps/googletest-src/googlemock/src/gmock-cardinalities.cc + testcpp/build/_deps/googletest-src/googlemock/src/gmock-internal-utils.cc + testcpp/build/_deps/googletest-src/googlemock/src/gmock-matchers.cc + testcpp/build/_deps/googletest-src/googlemock/src/gmock-spec-builders.cc + testcpp/build/_deps/googletest-src/googlemock/src/gmock.cc + testcpp/build/_deps/googletest-src/googlemock/src/gmock_main.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-actions_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-cardinalities_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-function-mocker_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-internal-utils_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-arithmetic_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-comparisons_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-containers_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-misc_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers_test.h + testcpp/build/_deps/googletest-src/googlemock/test/gmock-more-actions_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-nice-strict_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-port_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp-string_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock-spec-builders_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_all_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_ex_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_leak_test_.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_link2_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.h + testcpp/build/_deps/googletest-src/googlemock/test/gmock_output_test_.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_stress_test.cc + testcpp/build/_deps/googletest-src/googlemock/test/gmock_test.cc + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-port.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-printers.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-death-test-internal.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-filepath.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-internal.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-param-util.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port-arch.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-string.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-type-util.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-assertion-result.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-death-test.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-matchers.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-message.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-param-test.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-printers.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-spi.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-test-part.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-typed-test.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_pred_impl.h + testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_prod.h + testcpp/build/_deps/googletest-src/googletest/samples/prime_tables.h + testcpp/build/_deps/googletest-src/googletest/samples/sample1.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample1.h + testcpp/build/_deps/googletest-src/googletest/samples/sample10_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample1_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample2.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample2.h + testcpp/build/_deps/googletest-src/googletest/samples/sample2_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample3-inl.h + testcpp/build/_deps/googletest-src/googletest/samples/sample3_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample4.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample4.h + testcpp/build/_deps/googletest-src/googletest/samples/sample4_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample5_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample6_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample7_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample8_unittest.cc + testcpp/build/_deps/googletest-src/googletest/samples/sample9_unittest.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-all.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-assertion-result.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-death-test.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-filepath.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-internal-inl.h + testcpp/build/_deps/googletest-src/googletest/src/gtest-matchers.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-port.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-printers.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-test-part.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest-typed-test.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest.cc + testcpp/build/_deps/googletest-src/googletest/src/gtest_main.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-break-on-failure-unittest_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-catch-exceptions-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-color-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test_ex_test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-env-var-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-failfast-unittest_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-filepath-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-filter-unittest_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-global-environment-unittest_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-list-tests-unittest_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-listener-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-message-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-options-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-output-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name1-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name2-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.h + testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test2-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-port-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-printers-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-setuptestsuite-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-shuffle-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-test-part-test.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-throw-on-failure-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/googletest-uninitialized-test_.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test2_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.h + testcpp/build/_deps/googletest-src/googletest/test/gtest-unittest-api_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_all_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_assert_by_exception_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_environment_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_help_test_.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_list_output_unittest_.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_main_unittest.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_no_test_unittest.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_pred_impl_unittest.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_premature_exit_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_prod_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_repeat_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_in_environment_setup_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_sole_header_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_stress_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_test_macro_stack_footprint_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_testbridge_test_.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_throw_on_failure_ex_test.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_unittest.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile1_test_.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile2_test_.cc + testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_output_unittest_.cc + testcpp/build/_deps/googletest-src/googletest/test/production.cc + testcpp/build/_deps/googletest-src/googletest/test/production.h + testcpp/CMakeLists.txt + testcpp/FImdlp_unittest.cc + testcpp/Metrics_unittest.cc + cfimdlp.cpp + CPPFImdlp.cpp + CPPFImdlp.h + Metrics.cpp + Metrics.h + typesFImdlp.h) diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index 1238d5a..416e101 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -1,7 +1,6 @@ #include "CPPFImdlp.h" #include #include -#include #include #include "Metrics.h" namespace mdlp { @@ -16,14 +15,15 @@ namespace mdlp { CPPFImdlp::CPPFImdlp() : proposed(true), precision(6), debug(false) { divider = pow(10, precision); + numClasses = 0; } CPPFImdlp::CPPFImdlp(bool proposed, int precision, bool debug) : proposed(proposed), precision(precision), debug(debug) { divider = pow(10, precision); + numClasses = 0; } CPPFImdlp::~CPPFImdlp() - { - } + = default; std::vector CPPFImdlp::getCutPoints() { return cutPoints; @@ -32,11 +32,19 @@ namespace mdlp { { return xDiscretized; } - void CPPFImdlp::fit(samples& X, labels& y) + void CPPFImdlp::fit(samples& X_, labels& y_) { - this->X = X; - this->y = y; - this->indices = sortIndices(X); + X = X_; + y = y_; + if (X.size() != y.size()) { + std::cerr << "X and y must have the same size" << std::endl; + return; + } + if (X.size() == 0) { + std::cerr << "X and y must have at least one element" << std::endl; + return; + } + this->indices = sortIndices(X_); this->xDiscretized = labels(X.size(), -1); this->numClasses = Metrics::numClasses(y, indices, 0, X.size()); @@ -48,24 +56,24 @@ namespace mdlp { filterCutPoints(); applyCutPoints(); } - labels& CPPFImdlp::transform(samples& X) + labels& CPPFImdlp::transform(samples& X_) { - indices_t indices_transform = sortIndices(X); + indices_t indices_transform = sortIndices(X_); applyCutPoints(); return xDiscretized; } - void CPPFImdlp::debugPoints(samples& X, labels& y) + void CPPFImdlp::debugPoints(samples& X_, labels& y_) { std::cout << "+++++++++++++++++++++++" << std::endl; // for (auto i : sortIndices(X)) - indices_t indices = sortIndices(X); - for (size_t i = 0; i < indices.size(); i++) { - printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]); + indices_t indices_n = sortIndices(X); + for (size_t i = 0; i < indices_n.size(); i++) { + printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices_n[i], X_[indices_n[i]], y_[indices_n[i]]); } std::cout << "+++++++++++++++++++++++" << std::endl; - fit(X, y); + fit(X_, y_); for (auto item : cutPoints) { - std::cout << item.start << " X[" << item.end << "]=" << X[item.end] << std::endl; + std::cout << item.start << " X_[" << item.end << "]=" << X_[item.end] << std::endl; } } void CPPFImdlp::applyCutPoints() @@ -81,7 +89,7 @@ namespace mdlp { int k, k1, k2; float ig, delta; float ent, ent1, ent2; - float N = float(rest.end - rest.start); + auto N = float(rest.end - rest.start); if (N < 2) { return false; } @@ -92,7 +100,7 @@ namespace mdlp { ent1 = Metrics::entropy(y, indices, rest.start, candidate.end, numClasses); ent2 = Metrics::entropy(y, indices, candidate.end, rest.end, numClasses); ig = Metrics::informationGain(y, indices, rest.start, rest.end, candidate.end, numClasses); - delta = log2(pow(3, k) - 2) - (k * ent - k1 * ent1 - k2 * ent2); + delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2); float term = 1 / N * (log2(N - 1) + delta); if (debug) { std::cout << "Rest: " << rest; @@ -116,7 +124,8 @@ namespace mdlp { bool lastReject = false, first = true; for (auto item : cutPoints) { if (evaluateCutPoint(rest, item)) { - std::cout << "Accepted" << std::endl; + if (debug) + std::cout << "Accepted" << std::endl; if (lastReject) { if (first) { item.fromValue = std::numeric_limits::lowest(); @@ -132,13 +141,14 @@ namespace mdlp { first = false; rest.start = item.end; } else { - std::cout << "Rejected" << std::endl; + if (debug) + std::cout << "Rejected" << std::endl; lastReject = true; } } if (!first) { filtered.back().toValue = std::numeric_limits::max(); - filtered.back().end = X.size(); + filtered.back().end = X.size() - 1; } else { filtered.push_back(rest); } @@ -149,15 +159,13 @@ namespace mdlp { { cutPoints_t cutPts; cutPoint_t cutPoint; - indices_t cutIdx; float xPrev, xCur, xPivot; int yPrev, yCur, yPivot; - size_t idxPrev, idxPivot, idx, numElements, start; + size_t idx, numElements, start; xCur = xPrev = X[indices[0]]; yCur = yPrev = y[indices[0]]; numElements = indices.size() - 1; - idxPrev = indices[0]; idx = start = 0; bool firstCutPoint = true; if (debug) @@ -165,7 +173,6 @@ namespace mdlp { while (idx < numElements) { xPivot = xCur; yPivot = yCur; - idxPivot = indices[idx]; if (debug) printf(" Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur); // Read the same values and check class changes @@ -192,30 +199,29 @@ namespace mdlp { printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); } cutPts.push_back(cutPoint); - cutIdx.push_back(idxPrev); } yPrev = yPivot; xPrev = xPivot; - idxPrev = indices[idxPivot]; } if (idx == numElements) { cutPoint.start = start; - cutPoint.end = numElements; + cutPoint.end = numElements + 1; cutPoint.fromValue = firstCutPoint ? std::numeric_limits::lowest() : cutPts.back().toValue; cutPoint.toValue = std::numeric_limits::max(); cutPoint.classNumber = -1; if (debug) printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); cutPts.push_back(cutPoint); - cutIdx.push_back(idxPrev); } + for (auto cutPt : cutPts) + std::cout << "Cut point: " << cutPt; cutPoints = cutPts; } void CPPFImdlp::computeCutPointsOriginal() { cutPoints_t cutPts; cutPoint_t cutPoint; - float xPrev = std::numeric_limits::lowest(); + float xPrev; int yPrev; bool first = true; // idxPrev is the index of the init instance of the cutPoint @@ -236,40 +242,34 @@ namespace mdlp { cutPoint.end = index; cutPoint.classNumber = -1; cutPoint.toValue = round(divider * (X[idx] + xPrev) / 2) / divider; - if (debug) { - std::cout << "Cut point: " << cutPoint << " //"; - std::cout << X[idx] << " -> " << y[idx] << " yPrev= " - << yPrev << idxPrev << std::endl; - } idxPrev = index; cutPts.push_back(cutPoint); } xPrev = X[idx]; yPrev = y[idx]; } - std::cout << "Came to here" << first << std::endl; if (first) { cutPoint.start = 0; cutPoint.classNumber = -1; cutPoint.fromValue = std::numeric_limits::lowest(); cutPoint.toValue = std::numeric_limits::max(); - cutPoints.push_back(cutPoint); + cutPts.push_back(cutPoint); } else cutPts.back().toValue = std::numeric_limits::max(); cutPts.back().end = X.size(); if (debug) - for (auto cutPoint : cutPts) - std::cout << "Cut point: " << cutPoint << std::endl; + for (auto cutPt : cutPts) + std::cout << "-Cut point: " << cutPt; cutPoints = cutPts; } // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes - indices_t CPPFImdlp::sortIndices(samples& X) + indices_t CPPFImdlp::sortIndices(samples& X_) { - indices_t idx(X.size()); + indices_t idx(X_.size()); std::iota(idx.begin(), idx.end(), 0); - for (std::size_t i = 0; i < X.size(); i++) - stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2) - { return X[i1] < X[i2]; }); + for (std::size_t i = 0; i < X_.size(); i++) + stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2) + { return X_[i1] < X_[i2]; }); return idx; } } diff --git a/fimdlp/CPPFImdlp.h b/fimdlp/CPPFImdlp.h index f242eaf..9dc01ad 100644 --- a/fimdlp/CPPFImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -17,7 +17,7 @@ namespace mdlp { cutPoints_t cutPoints; protected: - indices_t sortIndices(samples&); + static indices_t sortIndices(samples&); void computeCutPointsOriginal(); void computeCutPointsProposed(); bool evaluateCutPoint(cutPoint_t, cutPoint_t); diff --git a/fimdlp/Metrics.cpp b/fimdlp/Metrics.cpp index c26751f..6887400 100644 --- a/fimdlp/Metrics.cpp +++ b/fimdlp/Metrics.cpp @@ -2,8 +2,7 @@ #include namespace mdlp { Metrics::Metrics() - { - } + = default; int Metrics::numClasses(labels& y, indices_t indices, size_t start, size_t end) { std::set numClasses; @@ -31,7 +30,7 @@ namespace mdlp { } float Metrics::informationGain(labels& y, indices_t& indices, size_t start, size_t end, size_t cutPoint, int nClasses) { - float iGain = 0.0; + float iGain; float entropy, entropyLeft, entropyRight; int nClassesLeft, nClassesRight; int nElementsLeft = cutPoint - start, nElementsRight = end - cutPoint; diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so new file mode 100755 index 0000000..314839f Binary files /dev/null and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/testcpp/CMakeLists.txt b/fimdlp/testcpp/CMakeLists.txt index 1fa0e3d..87b9741 100644 --- a/fimdlp/testcpp/CMakeLists.txt +++ b/fimdlp/testcpp/CMakeLists.txt @@ -5,6 +5,18 @@ project(FImdlp) set(CMAKE_CXX_STANDARD 14) include(FetchContent) +include_directories( + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest/include + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest/src + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest/test/ + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googlemock + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googlemock/include + /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googlemock/test + /Library/Frameworks/Python.framework/Versions/Current/ +) +#include_directories(/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/googletest-release-1.12.1/googletest/include /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/googletest-release-1.12.1/googletest) FetchContent_Declare( googletest URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip @@ -24,4 +36,3 @@ target_link_libraries(FImdlp_unittest GTest::gtest_main) include(GoogleTest) gtest_discover_tests(Metrics_unittest) gtest_discover_tests(FImdlp_unittest) - diff --git a/fimdlp/testcpp/FImdlp_unittest.cc b/fimdlp/testcpp/FImdlp_unittest.cc index bc7f226..3be0b78 100644 --- a/fimdlp/testcpp/FImdlp_unittest.cc +++ b/fimdlp/testcpp/FImdlp_unittest.cc @@ -4,24 +4,23 @@ namespace mdlp { class TestMetrics : public CPPFImdlp, public testing::Test { public: - //TestMetrics(samples X, labels y, indices_t indices) : X(X), y(y), indices(indices), CPPFImdlp(true) {} + TestMetrics() : CPPFImdlp(true, 6, false) {} indices_t indices; // sorted indices to use with X and y samples X; labels y; samples xDiscretized; int numClasses; float precision_test = 0.000001; - void SetUp() override + void SetUp() { X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; - indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; - numClasses = 2; + fit(X, y); } - void check_sorted_vector(samples& X, indices_t indices) + void check_sorted_vector(samples& X_, indices_t indices_) { - this->X = X; - this->indices = indices; + X = X_; + indices = indices_; indices_t testSortedIndices = sortIndices(X); float prev = X[testSortedIndices[0]]; for (auto i = 0; i < X.size(); ++i) { @@ -30,21 +29,11 @@ namespace mdlp { prev = X[testSortedIndices[i]]; } } - std::vector testCutPoints(samples& X, indices_t& indices, labels& y) - { - this->X = X; - this->y = y; - this->indices = indices; - this->numClasses = Metrics::numClasses(y, indices, 0, X.size()); - - //computeCutPoints(); - return getCutPoints(); - } }; // TEST_F(TestMetrics, SortIndices) { - samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; + X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; check_sorted_vector(X, indices); X = { 5.77, 5.88, 5.99 }; @@ -54,23 +43,97 @@ namespace mdlp { indices = { 2, 1, 0 }; check_sorted_vector(X, indices); } - // TEST_F(TestMetrics, EvaluateCutPoint) - // { - // cutPoint_t rest, candidate; - // rest.start = 0; - // rest.end = 10; - // candidate.start = 0; - // candidate.end = 5; - // float computed = evaluateCutPoint(rest, candidate); - // ASSERT_NEAR(0.468996, computed, precision_test); - // } + TEST_F(TestMetrics, EvaluateCutPoint) + { + cutPoint_t rest, candidate; + rest.start = 0; + rest.end = 10; + rest.classNumber = -1; + rest.fromValue = -1; + rest.toValue = 1000; + candidate.start = 0; + candidate.end = 4; + candidate.fromValue = -1; + candidate.toValue = 5.15; + candidate.classNumber = -1; + EXPECT_FALSE(evaluateCutPoint(rest, candidate)); + } TEST_F(TestMetrics, ComputeCutPointsOriginal) { - std::vector computed, expected; + cutPoints_t computed, expected; + expected = { + { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 }, + { 6, 7, -1, 5.45, 5.65 }, { 7, 10, -1, 5.65, 3.4028234663852886e+38 } + }; computeCutPointsOriginal(); computed = getCutPoints(); - for (auto cut : computed) { - std::cout << cut.classNumber << " -> (" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << ")" << std::endl; + EXPECT_EQ(computed.size(), 4); + for (auto i = 0; i < 4; i++) { + EXPECT_EQ(computed[i].start, expected[i].start); + EXPECT_EQ(computed[i].end, expected[i].end); + EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); + EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); + EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); + } + } + TEST_F(TestMetrics, ComputeCutPointsOriginalGCase) + { + cutPoints_t computed, expected; + expected = { + { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 }, + }; + X = {0, 1, 2, 2}; + y = {1, 1, 1, 2}; + fit(X, y); + computeCutPointsOriginal(); + computed = getCutPoints(); + EXPECT_EQ(computed.size(), 1); + for (auto i = 0; i < 1; i++) { + EXPECT_EQ(computed[i].start, expected[i].start); + EXPECT_EQ(computed[i].end, expected[i].end); + EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); + EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); + EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); + } + } + TEST_F(TestMetrics, ComputeCutPointsProposed) + { + cutPoints_t computed, expected; + expected = { + { 0, 3, -1, -3.4028234663852886e+38, 5.1 }, { 4, 4, -1, 5.1, 5.2 }, + { 5, 5, -1, 5.2, 5.4 }, { 6, 8, -1, 5.4, 5.85 }, + { 9, 10, -1, 5.85, 3.4028234663852886e+38 } + }; + computeCutPointsProposed(); + computed = getCutPoints(); + EXPECT_EQ(computed.size(), 5); + for (auto i = 0; i < 5; i++) { + EXPECT_EQ(computed[i].start, expected[i].start); + EXPECT_EQ(computed[i].end, expected[i].end); + EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); + EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); + EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); + } + } + TEST_F(TestMetrics, ComputeCutPointsProposedGCase) + { + cutPoints_t computed, expected; + expected = { + { 0, 2, -1, -3.4028234663852886e+38, 1.5 }, + { 3, 4, -1, 1.5, 3.4028234663852886e+38 } + }; + X = {0, 1, 2, 2}; + y = {1, 1, 1, 2}; + fit(X, y); + computeCutPointsProposed(); + computed = getCutPoints(); + EXPECT_EQ(computed.size(), 2); + for (auto i = 0; i < 1; i++) { + EXPECT_EQ(computed[i].start, expected[i].start); + EXPECT_EQ(computed[i].end, expected[i].end); + EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); + EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); + EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); } } } \ No newline at end of file diff --git a/sample.py b/sample.py index 6f7285d..cf2f926 100644 --- a/sample.py +++ b/sample.py @@ -61,32 +61,34 @@ data = load_iris() X = data.data y = data.target features = data.feature_names -test = FImdlp() -test.fit(X, y, features=features) -test.transform(X) +# test = FImdlp() +# test.fit(X, y, features=features) +# test.transform(X) +# test.get_cut_points() -# test = CFImdlp(debug=False) +test = CFImdlp(debug=True, proposed=False) # # k = test.cut_points(X[:, 0], y) # # print(k) # # k = test.cut_points_ant(X[:, 0], y) # # print(k) # # test.debug_points(X[:, 0], y) -# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9] -# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7] -# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] +X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9] +indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7] +y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] # # To check # indices2 = np.argsort(X) # Xs = np.array(X)[indices2] # ys = np.array(y)[indices2] -# # test.fit(X[:, 0], y) -# test.fit(X, y) -# result = test.get_cut_points() -# for item in result: -# print( -# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" -# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" -# ) -# print(test.get_discretized_values()) + +# test.fit(X[:, 0], y) +test.fit(X, y) +result = test.get_cut_points() +for item in result: + print( + f"*Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" + f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" + ) +print(test.get_discretized_values()) # print(Xs, ys) # print("**********************")