Fix some errors in CPPFImdlp

This commit is contained in:
2022-12-02 23:25:53 +01:00
parent 97cd2243fa
commit f28465a64f
9 changed files with 355 additions and 96 deletions

3
.gitignore vendored
View File

@@ -130,4 +130,5 @@ dmypy.json
cfimdlp.cpp
.vscode/*
**/.idea/*
cmake-build-debug
cmake-build-debug/**

183
fimdlp/CMakeLists.txt Normal file
View File

@@ -0,0 +1,183 @@
cmake_minimum_required(VERSION 3.24)
project(fimdlp)
set(CMAKE_CXX_STANDARD 14)
include_directories(.)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom)
include_directories(testcpp/build/_deps/googletest-src/googlemock/test)
include_directories(testcpp/build/_deps/googletest-src/googletest/include)
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest)
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal)
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom)
include_directories(testcpp/build/_deps/googletest-src/googletest/samples)
include_directories(testcpp/build/_deps/googletest-src/googletest/src)
include_directories(testcpp/build/_deps/googletest-src/googletest/test)
add_executable(fimdlp
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-matchers.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-port.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-internal-utils.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-port.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-pp.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-actions.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-cardinalities.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-function-mocker.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-matchers.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-actions.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-matchers.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-nice-strict.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-spec-builders.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock.h
testcpp/build/_deps/googletest-src/googlemock/src/gmock-all.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-cardinalities.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-internal-utils.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-matchers.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-spec-builders.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock_main.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-actions_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-cardinalities_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-function-mocker_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-internal-utils_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-arithmetic_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-comparisons_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-containers_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-misc_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers_test.h
testcpp/build/_deps/googletest-src/googlemock/test/gmock-more-actions_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-nice-strict_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-port_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp-string_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-spec-builders_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_all_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_ex_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_leak_test_.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link2_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.h
testcpp/build/_deps/googletest-src/googlemock/test/gmock_output_test_.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_stress_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_test.cc
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-port.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-printers.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-death-test-internal.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-filepath.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-internal.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-param-util.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port-arch.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-string.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-type-util.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-assertion-result.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-death-test.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-matchers.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-message.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-param-test.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-printers.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-spi.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-test-part.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-typed-test.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_pred_impl.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_prod.h
testcpp/build/_deps/googletest-src/googletest/samples/prime_tables.h
testcpp/build/_deps/googletest-src/googletest/samples/sample1.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample1.h
testcpp/build/_deps/googletest-src/googletest/samples/sample10_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample1_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample2.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample2.h
testcpp/build/_deps/googletest-src/googletest/samples/sample2_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample3-inl.h
testcpp/build/_deps/googletest-src/googletest/samples/sample3_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample4.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample4.h
testcpp/build/_deps/googletest-src/googletest/samples/sample4_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample5_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample6_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample7_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample8_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample9_unittest.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-all.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-assertion-result.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-death-test.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-filepath.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-internal-inl.h
testcpp/build/_deps/googletest-src/googletest/src/gtest-matchers.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-port.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-printers.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-test-part.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-typed-test.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest_main.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-break-on-failure-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-catch-exceptions-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-color-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test_ex_test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-env-var-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-failfast-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-filepath-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-filter-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-global-environment-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-list-tests-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-listener-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-message-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-options-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-output-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name1-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name2-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.h
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test2-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-port-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-printers-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-setuptestsuite-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-shuffle-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-test-part-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-throw-on-failure-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-uninitialized-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test2_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.h
testcpp/build/_deps/googletest-src/googletest/test/gtest-unittest-api_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_all_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_assert_by_exception_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_environment_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_help_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_list_output_unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_main_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_no_test_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_pred_impl_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_premature_exit_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_prod_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_repeat_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_in_environment_setup_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_sole_header_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_stress_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_test_macro_stack_footprint_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_testbridge_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_throw_on_failure_ex_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile1_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile2_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_output_unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/production.cc
testcpp/build/_deps/googletest-src/googletest/test/production.h
testcpp/CMakeLists.txt
testcpp/FImdlp_unittest.cc
testcpp/Metrics_unittest.cc
cfimdlp.cpp
CPPFImdlp.cpp
CPPFImdlp.h
Metrics.cpp
Metrics.h
typesFImdlp.h)

View File

@@ -1,7 +1,6 @@
#include "CPPFImdlp.h"
#include <numeric>
#include <iostream>
#include <iomanip>
#include <algorithm>
#include "Metrics.h"
namespace mdlp {
@@ -16,14 +15,15 @@ namespace mdlp {
CPPFImdlp::CPPFImdlp() : proposed(true), precision(6), debug(false)
{
divider = pow(10, precision);
numClasses = 0;
}
CPPFImdlp::CPPFImdlp(bool proposed, int precision, bool debug) : proposed(proposed), precision(precision), debug(debug)
{
divider = pow(10, precision);
numClasses = 0;
}
CPPFImdlp::~CPPFImdlp()
{
}
= default;
std::vector<cutPoint_t> CPPFImdlp::getCutPoints()
{
return cutPoints;
@@ -32,11 +32,19 @@ namespace mdlp {
{
return xDiscretized;
}
void CPPFImdlp::fit(samples& X, labels& y)
void CPPFImdlp::fit(samples& X_, labels& y_)
{
this->X = X;
this->y = y;
this->indices = sortIndices(X);
X = X_;
y = y_;
if (X.size() != y.size()) {
std::cerr << "X and y must have the same size" << std::endl;
return;
}
if (X.size() == 0) {
std::cerr << "X and y must have at least one element" << std::endl;
return;
}
this->indices = sortIndices(X_);
this->xDiscretized = labels(X.size(), -1);
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
@@ -48,24 +56,24 @@ namespace mdlp {
filterCutPoints();
applyCutPoints();
}
labels& CPPFImdlp::transform(samples& X)
labels& CPPFImdlp::transform(samples& X_)
{
indices_t indices_transform = sortIndices(X);
indices_t indices_transform = sortIndices(X_);
applyCutPoints();
return xDiscretized;
}
void CPPFImdlp::debugPoints(samples& X, labels& y)
void CPPFImdlp::debugPoints(samples& X_, labels& y_)
{
std::cout << "+++++++++++++++++++++++" << std::endl;
// for (auto i : sortIndices(X))
indices_t indices = sortIndices(X);
for (size_t i = 0; i < indices.size(); i++) {
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
indices_t indices_n = sortIndices(X);
for (size_t i = 0; i < indices_n.size(); i++) {
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices_n[i], X_[indices_n[i]], y_[indices_n[i]]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
fit(X, y);
fit(X_, y_);
for (auto item : cutPoints) {
std::cout << item.start << " X[" << item.end << "]=" << X[item.end] << std::endl;
std::cout << item.start << " X_[" << item.end << "]=" << X_[item.end] << std::endl;
}
}
void CPPFImdlp::applyCutPoints()
@@ -81,7 +89,7 @@ namespace mdlp {
int k, k1, k2;
float ig, delta;
float ent, ent1, ent2;
float N = float(rest.end - rest.start);
auto N = float(rest.end - rest.start);
if (N < 2) {
return false;
}
@@ -92,7 +100,7 @@ namespace mdlp {
ent1 = Metrics::entropy(y, indices, rest.start, candidate.end, numClasses);
ent2 = Metrics::entropy(y, indices, candidate.end, rest.end, numClasses);
ig = Metrics::informationGain(y, indices, rest.start, rest.end, candidate.end, numClasses);
delta = log2(pow(3, k) - 2) - (k * ent - k1 * ent1 - k2 * ent2);
delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2);
float term = 1 / N * (log2(N - 1) + delta);
if (debug) {
std::cout << "Rest: " << rest;
@@ -116,7 +124,8 @@ namespace mdlp {
bool lastReject = false, first = true;
for (auto item : cutPoints) {
if (evaluateCutPoint(rest, item)) {
std::cout << "Accepted" << std::endl;
if (debug)
std::cout << "Accepted" << std::endl;
if (lastReject) {
if (first) {
item.fromValue = std::numeric_limits<float>::lowest();
@@ -132,13 +141,14 @@ namespace mdlp {
first = false;
rest.start = item.end;
} else {
std::cout << "Rejected" << std::endl;
if (debug)
std::cout << "Rejected" << std::endl;
lastReject = true;
}
}
if (!first) {
filtered.back().toValue = std::numeric_limits<float>::max();
filtered.back().end = X.size();
filtered.back().end = X.size() - 1;
} else {
filtered.push_back(rest);
}
@@ -149,15 +159,13 @@ namespace mdlp {
{
cutPoints_t cutPts;
cutPoint_t cutPoint;
indices_t cutIdx;
float xPrev, xCur, xPivot;
int yPrev, yCur, yPivot;
size_t idxPrev, idxPivot, idx, numElements, start;
size_t idx, numElements, start;
xCur = xPrev = X[indices[0]];
yCur = yPrev = y[indices[0]];
numElements = indices.size() - 1;
idxPrev = indices[0];
idx = start = 0;
bool firstCutPoint = true;
if (debug)
@@ -165,7 +173,6 @@ namespace mdlp {
while (idx < numElements) {
xPivot = xCur;
yPivot = yCur;
idxPivot = indices[idx];
if (debug)
printf("<idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
// Read the same values and check class changes
@@ -192,30 +199,29 @@ namespace mdlp {
printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
}
cutPts.push_back(cutPoint);
cutIdx.push_back(idxPrev);
}
yPrev = yPivot;
xPrev = xPivot;
idxPrev = indices[idxPivot];
}
if (idx == numElements) {
cutPoint.start = start;
cutPoint.end = numElements;
cutPoint.end = numElements + 1;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = std::numeric_limits<float>::max();
cutPoint.classNumber = -1;
if (debug)
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
cutPts.push_back(cutPoint);
cutIdx.push_back(idxPrev);
}
for (auto cutPt : cutPts)
std::cout << "Cut point: " << cutPt;
cutPoints = cutPts;
}
void CPPFImdlp::computeCutPointsOriginal()
{
cutPoints_t cutPts;
cutPoint_t cutPoint;
float xPrev = std::numeric_limits<float>::lowest();
float xPrev;
int yPrev;
bool first = true;
// idxPrev is the index of the init instance of the cutPoint
@@ -236,40 +242,34 @@ namespace mdlp {
cutPoint.end = index;
cutPoint.classNumber = -1;
cutPoint.toValue = round(divider * (X[idx] + xPrev) / 2) / divider;
if (debug) {
std::cout << "Cut point: " << cutPoint << " //";
std::cout << X[idx] << " -> " << y[idx] << " yPrev= "
<< yPrev << idxPrev << std::endl;
}
idxPrev = index;
cutPts.push_back(cutPoint);
}
xPrev = X[idx];
yPrev = y[idx];
}
std::cout << "Came to here" << first << std::endl;
if (first) {
cutPoint.start = 0;
cutPoint.classNumber = -1;
cutPoint.fromValue = std::numeric_limits<float>::lowest();
cutPoint.toValue = std::numeric_limits<float>::max();
cutPoints.push_back(cutPoint);
cutPts.push_back(cutPoint);
} else
cutPts.back().toValue = std::numeric_limits<float>::max();
cutPts.back().end = X.size();
if (debug)
for (auto cutPoint : cutPts)
std::cout << "Cut point: " << cutPoint << std::endl;
for (auto cutPt : cutPts)
std::cout << "-Cut point: " << cutPt;
cutPoints = cutPts;
}
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices(samples& X)
indices_t CPPFImdlp::sortIndices(samples& X_)
{
indices_t idx(X.size());
indices_t idx(X_.size());
std::iota(idx.begin(), idx.end(), 0);
for (std::size_t i = 0; i < X.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2)
{ return X[i1] < X[i2]; });
for (std::size_t i = 0; i < X_.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
{ return X_[i1] < X_[i2]; });
return idx;
}
}

View File

@@ -17,7 +17,7 @@ namespace mdlp {
cutPoints_t cutPoints;
protected:
indices_t sortIndices(samples&);
static indices_t sortIndices(samples&);
void computeCutPointsOriginal();
void computeCutPointsProposed();
bool evaluateCutPoint(cutPoint_t, cutPoint_t);

View File

@@ -2,8 +2,7 @@
#include <set>
namespace mdlp {
Metrics::Metrics()
{
}
= default;
int Metrics::numClasses(labels& y, indices_t indices, size_t start, size_t end)
{
std::set<int> numClasses;
@@ -31,7 +30,7 @@ namespace mdlp {
}
float Metrics::informationGain(labels& y, indices_t& indices, size_t start, size_t end, size_t cutPoint, int nClasses)
{
float iGain = 0.0;
float iGain;
float entropy, entropyLeft, entropyRight;
int nClassesLeft, nClassesRight;
int nElementsLeft = cutPoint - start, nElementsRight = end - cutPoint;

Binary file not shown.

View File

@@ -5,6 +5,18 @@ project(FImdlp)
set(CMAKE_CXX_STANDARD 14)
include(FetchContent)
include_directories(
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest/include
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest/src
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googletest/test/
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googlemock
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googlemock/include
/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/build/_deps/googletest-src/googlemock/test
/Library/Frameworks/Python.framework/Versions/Current/
)
#include_directories(/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/googletest-release-1.12.1/googletest/include /Users/rmontanana/Code/FImdlp/fimdlp/testcpp/googletest-release-1.12.1/googletest)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
@@ -24,4 +36,3 @@ target_link_libraries(FImdlp_unittest GTest::gtest_main)
include(GoogleTest)
gtest_discover_tests(Metrics_unittest)
gtest_discover_tests(FImdlp_unittest)

View File

@@ -4,24 +4,23 @@
namespace mdlp {
class TestMetrics : public CPPFImdlp, public testing::Test {
public:
//TestMetrics(samples X, labels y, indices_t indices) : X(X), y(y), indices(indices), CPPFImdlp(true) {}
TestMetrics() : CPPFImdlp(true, 6, false) {}
indices_t indices; // sorted indices to use with X and y
samples X;
labels y;
samples xDiscretized;
int numClasses;
float precision_test = 0.000001;
void SetUp() override
void SetUp()
{
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
numClasses = 2;
fit(X, y);
}
void check_sorted_vector(samples& X, indices_t indices)
void check_sorted_vector(samples& X_, indices_t indices_)
{
this->X = X;
this->indices = indices;
X = X_;
indices = indices_;
indices_t testSortedIndices = sortIndices(X);
float prev = X[testSortedIndices[0]];
for (auto i = 0; i < X.size(); ++i) {
@@ -30,21 +29,11 @@ namespace mdlp {
prev = X[testSortedIndices[i]];
}
}
std::vector<cutPoint_t> testCutPoints(samples& X, indices_t& indices, labels& y)
{
this->X = X;
this->y = y;
this->indices = indices;
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
//computeCutPoints();
return getCutPoints();
}
};
//
TEST_F(TestMetrics, SortIndices)
{
samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
check_sorted_vector(X, indices);
X = { 5.77, 5.88, 5.99 };
@@ -54,23 +43,97 @@ namespace mdlp {
indices = { 2, 1, 0 };
check_sorted_vector(X, indices);
}
// TEST_F(TestMetrics, EvaluateCutPoint)
// {
// cutPoint_t rest, candidate;
// rest.start = 0;
// rest.end = 10;
// candidate.start = 0;
// candidate.end = 5;
// float computed = evaluateCutPoint(rest, candidate);
// ASSERT_NEAR(0.468996, computed, precision_test);
// }
TEST_F(TestMetrics, EvaluateCutPoint)
{
cutPoint_t rest, candidate;
rest.start = 0;
rest.end = 10;
rest.classNumber = -1;
rest.fromValue = -1;
rest.toValue = 1000;
candidate.start = 0;
candidate.end = 4;
candidate.fromValue = -1;
candidate.toValue = 5.15;
candidate.classNumber = -1;
EXPECT_FALSE(evaluateCutPoint(rest, candidate));
}
TEST_F(TestMetrics, ComputeCutPointsOriginal)
{
std::vector<cutPoint_t> computed, expected;
cutPoints_t computed, expected;
expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
{ 6, 7, -1, 5.45, 5.65 }, { 7, 10, -1, 5.65, 3.4028234663852886e+38 }
};
computeCutPointsOriginal();
computed = getCutPoints();
for (auto cut : computed) {
std::cout << cut.classNumber << " -> (" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << ")" << std::endl;
EXPECT_EQ(computed.size(), 4);
for (auto i = 0; i < 4; i++) {
EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
}
}
TEST_F(TestMetrics, ComputeCutPointsOriginalGCase)
{
cutPoints_t computed, expected;
expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
};
X = {0, 1, 2, 2};
y = {1, 1, 1, 2};
fit(X, y);
computeCutPointsOriginal();
computed = getCutPoints();
EXPECT_EQ(computed.size(), 1);
for (auto i = 0; i < 1; i++) {
EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
}
}
TEST_F(TestMetrics, ComputeCutPointsProposed)
{
cutPoints_t computed, expected;
expected = {
{ 0, 3, -1, -3.4028234663852886e+38, 5.1 }, { 4, 4, -1, 5.1, 5.2 },
{ 5, 5, -1, 5.2, 5.4 }, { 6, 8, -1, 5.4, 5.85 },
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
};
computeCutPointsProposed();
computed = getCutPoints();
EXPECT_EQ(computed.size(), 5);
for (auto i = 0; i < 5; i++) {
EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
}
}
TEST_F(TestMetrics, ComputeCutPointsProposedGCase)
{
cutPoints_t computed, expected;
expected = {
{ 0, 2, -1, -3.4028234663852886e+38, 1.5 },
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
};
X = {0, 1, 2, 2};
y = {1, 1, 1, 2};
fit(X, y);
computeCutPointsProposed();
computed = getCutPoints();
EXPECT_EQ(computed.size(), 2);
for (auto i = 0; i < 1; i++) {
EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
}
}
}

View File

@@ -61,32 +61,34 @@ data = load_iris()
X = data.data
y = data.target
features = data.feature_names
test = FImdlp()
test.fit(X, y, features=features)
test.transform(X)
# test = FImdlp()
# test.fit(X, y, features=features)
# test.transform(X)
# test.get_cut_points()
# test = CFImdlp(debug=False)
test = CFImdlp(debug=True, proposed=False)
# # k = test.cut_points(X[:, 0], y)
# # print(k)
# # k = test.cut_points_ant(X[:, 0], y)
# # print(k)
# # test.debug_points(X[:, 0], y)
# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# # To check
# indices2 = np.argsort(X)
# Xs = np.array(X)[indices2]
# ys = np.array(y)[indices2]
# # test.fit(X[:, 0], y)
# test.fit(X, y)
# result = test.get_cut_points()
# for item in result:
# print(
# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
# )
# print(test.get_discretized_values())
# test.fit(X[:, 0], y)
test.fit(X, y)
result = test.get_cut_points()
for item in result:
print(
f"*Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
)
print(test.get_discretized_values())
# print(Xs, ys)
# print("**********************")