refactor tests

This commit is contained in:
2022-12-04 17:28:05 +01:00
parent 31c9b8a3a3
commit cd3df9f5fc
5 changed files with 61 additions and 36 deletions

View File

@@ -11,6 +11,7 @@ test:
cd fimdlp/testcpp && ./test cd fimdlp/testcpp && ./test
coverage: coverage:
make test
cd fimdlp/testcpp && ./cover cd fimdlp/testcpp && ./cover
lint: ## Lint and static-check lint: ## Lint and static-check

View File

@@ -3,12 +3,13 @@
#include <iostream> #include <iostream>
#include <algorithm> #include <algorithm>
#include "Metrics.h" #include "Metrics.h"
namespace mdlp { namespace mdlp {
std::ostream& operator << (std::ostream& os, const cutPoint_t& cut) ostream& operator << (ostream& os, const cutPoint_t& cut)
{ {
os << cut.classNumber << " -> (" << cut.start << ", " << cut.end << os << cut.classNumber << " -> (" << cut.start << ", " << cut.end <<
") - (" << cut.fromValue << ", " << cut.toValue << ") " ") - (" << cut.fromValue << ", " << cut.toValue << ") "
<< std::endl; << endl;
return os; return os;
} }
@@ -27,7 +28,7 @@ namespace mdlp {
samples CPPFImdlp::getCutPoints() samples CPPFImdlp::getCutPoints()
{ {
samples output(cutPoints.size()); samples output(cutPoints.size());
std::transform(cutPoints.begin(), cutPoints.end(), output.begin(), ::transform(cutPoints.begin(), cutPoints.end(), output.begin(),
[](cutPoint_t cut) { return cut.toValue; }); [](cutPoint_t cut) { return cut.toValue; });
return output; return output;
} }
@@ -40,11 +41,11 @@ namespace mdlp {
X = X_; X = X_;
y = y_; y = y_;
if (X.size() != y.size()) { if (X.size() != y.size()) {
std::cerr << "X and y must have the same size" << std::endl; cerr << "X and y must have the same size" << endl;
return *this; return *this;
} }
if (X.size() == 0) { if (X.size() == 0) {
std::cerr << "X and y must have at least one element" << std::endl; cerr << "X and y must have at least one element" << endl;
return *this; return *this;
} }
this->indices = sortIndices(X_); this->indices = sortIndices(X_);
@@ -84,10 +85,10 @@ namespace mdlp {
delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2); delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2);
float term = 1 / N * (log2(N - 1) + delta); float term = 1 / N * (log2(N - 1) + delta);
if (debug) { if (debug) {
std::cout << "Rest: " << rest; cout << "Rest: " << rest;
std::cout << "Candidate: " << candidate; cout << "Candidate: " << candidate;
std::cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << std::endl; cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << endl;
std::cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << std::endl; cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << endl;
} }
return (ig > term); return (ig > term);
} }
@@ -99,15 +100,15 @@ namespace mdlp {
rest.start = 0; rest.start = 0;
rest.end = X.size(); rest.end = X.size();
rest.fromValue = std::numeric_limits<float>::lowest(); rest.fromValue = numeric_limits<float>::lowest();
rest.toValue = std::numeric_limits<float>::max(); rest.toValue = numeric_limits<float>::max();
rest.classNumber = classNumber; rest.classNumber = classNumber;
bool first = true; bool first = true;
for (size_t index = 0; index < size_t(cutPoints.size()); index++) { for (size_t index = 0; index < size_t(cutPoints.size()); index++) {
item = cutPoints[index]; item = cutPoints[index];
if (evaluateCutPoint(rest, item)) { if (evaluateCutPoint(rest, item)) {
if (debug) if (debug)
std::cout << "Accepted: " << item << std::endl; cout << "Accepted: " << item << endl;
//Assign class number to the interval (cutpoint) //Assign class number to the interval (cutpoint)
item.classNumber = classNumber++; item.classNumber = classNumber++;
filtered.push_back(item); filtered.push_back(item);
@@ -115,11 +116,11 @@ namespace mdlp {
rest.start = item.end; rest.start = item.end;
} else { } else {
if (debug) if (debug)
std::cout << "Rejected: " << item << std::endl; cout << "Rejected: " << item << endl;
if (index != size_t(cutPoints.size()) - 1) { if (index != size_t(cutPoints.size()) - 1) {
// Try to merge the rejected cutpoint with the next one // Try to merge the rejected cutpoint with the next one
if (first) { if (first) {
cutPoints[index + 1].fromValue = std::numeric_limits<float>::lowest(); cutPoints[index + 1].fromValue = numeric_limits<float>::lowest();
cutPoints[index + 1].start = indices[0]; cutPoints[index + 1].start = indices[0];
} else { } else {
cutPoints[index + 1].fromValue = item.fromValue; cutPoints[index + 1].fromValue = item.fromValue;
@@ -129,7 +130,7 @@ namespace mdlp {
} }
} }
if (!first) { if (!first) {
filtered.back().toValue = std::numeric_limits<float>::max(); filtered.back().toValue = numeric_limits<float>::max();
filtered.back().end = X.size() - 1; filtered.back().end = X.size() - 1;
} else { } else {
filtered.push_back(rest); filtered.push_back(rest);
@@ -175,7 +176,7 @@ namespace mdlp {
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = idx; cutPoint.end = idx;
start = idx; start = idx;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue; cutPoint.fromValue = firstCutPoint ? numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = (xPrev + xCur) / 2; cutPoint.toValue = (xPrev + xCur) / 2;
cutPoint.classNumber = -1; cutPoint.classNumber = -1;
firstCutPoint = false; firstCutPoint = false;
@@ -190,17 +191,17 @@ namespace mdlp {
if (idx == numElements) { if (idx == numElements) {
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = numElements + 1; cutPoint.end = numElements + 1;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue; cutPoint.fromValue = firstCutPoint ? numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = std::numeric_limits<float>::max(); cutPoint.toValue = numeric_limits<float>::max();
cutPoint.classNumber = -1; cutPoint.classNumber = -1;
if (debug) if (debug)
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
} }
if (debug) { if (debug) {
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << std::endl; cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << endl;
for (auto cutPt : cutPts) for (auto cutPt : cutPts)
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposal: Cut point: " << cutPt; cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposal: Cut point: " << cutPt;
} }
cutPoints = cutPts; cutPoints = cutPts;
} }
@@ -224,7 +225,7 @@ namespace mdlp {
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point } // if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
if (first) { if (first) {
first = false; first = false;
cutPoint.fromValue = std::numeric_limits<float>::lowest(); cutPoint.fromValue = numeric_limits<float>::lowest();
} else { } else {
cutPoint.fromValue = cutPts.back().toValue; cutPoint.fromValue = cutPts.back().toValue;
} }
@@ -241,16 +242,16 @@ namespace mdlp {
if (first) { if (first) {
cutPoint.start = 0; cutPoint.start = 0;
cutPoint.classNumber = -1; cutPoint.classNumber = -1;
cutPoint.fromValue = std::numeric_limits<float>::lowest(); cutPoint.fromValue = numeric_limits<float>::lowest();
cutPoint.toValue = std::numeric_limits<float>::max(); cutPoint.toValue = numeric_limits<float>::max();
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
} else } else
cutPts.back().toValue = std::numeric_limits<float>::max(); cutPts.back().toValue = numeric_limits<float>::max();
cutPts.back().end = X.size(); cutPts.back().end = X.size();
if (debug) { if (debug) {
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << std::endl; cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << endl;
for (auto cutPt : cutPts) for (auto cutPt : cutPts)
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt; cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt;
} }
cutPoints = cutPts; cutPoints = cutPts;
} }
@@ -258,8 +259,8 @@ namespace mdlp {
indices_t CPPFImdlp::sortIndices(samples& X_) indices_t CPPFImdlp::sortIndices(samples& X_)
{ {
indices_t idx(X_.size()); indices_t idx(X_.size());
std::iota(idx.begin(), idx.end(), 0); iota(idx.begin(), idx.end(), 0);
for (std::size_t i = 0; i < X_.size(); i++) for (size_t i = 0; i < X_.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2) stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
{ return X_[i1] < X_[i2]; }); { return X_[i1] < X_[i2]; });
return idx; return idx;

View File

@@ -49,6 +49,14 @@ namespace mdlp {
EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision); EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision);
} }
} }
template<typename T, typename A>
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
{
EXPECT_EQ(expected.size(), computed.size());
for (auto i = 0; i < expected.size(); i++) {
EXPECT_EQ(expected[i], computed[i]);
}
}
}; };
TEST_F(TestFImdlp, SortIndices) TEST_F(TestFImdlp, SortIndices)
@@ -72,7 +80,7 @@ namespace mdlp {
} }
TEST_F(TestFImdlp, ComputeCutPointsOriginal) TEST_F(TestFImdlp, ComputeCutPointsOriginal)
{ {
cutPoints_t computed, expected; cutPoints_t expected;
expected = { expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 }, { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 } { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
@@ -83,7 +91,7 @@ namespace mdlp {
} }
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
{ {
cutPoints_t computed, expected; cutPoints_t expected;
expected = { expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 }, { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
}; };
@@ -95,7 +103,7 @@ namespace mdlp {
} }
TEST_F(TestFImdlp, ComputeCutPointsProposal) TEST_F(TestFImdlp, ComputeCutPointsProposal)
{ {
cutPoints_t computed, expected; cutPoints_t expected;
expected = { expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 }, { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
{ 6, 9, -1, 5.4, 5.85 }, { 6, 9, -1, 5.4, 5.85 },
@@ -106,7 +114,7 @@ namespace mdlp {
} }
TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
{ {
cutPoints_t computed, expected; cutPoints_t expected;
expected = { expected = {
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 }, { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 } { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
@@ -117,4 +125,17 @@ namespace mdlp {
computeCutPointsProposal(); computeCutPointsProposal();
checkCutPoints(expected); checkCutPoints(expected);
} }
TEST_F(TestFImdlp, DiscretizedValues)
{
labels computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
computed = getDiscretizedValues();
checkVectors(expected, computed);
}
TEST_F(TestFImdlp, GetCutPoints)
{
samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
computeCutPointsOriginal();
computed = getCutPoints();
checkVectors(expected, computed);
}
} }

View File

@@ -1,6 +1,8 @@
#ifndef TYPES_H #ifndef TYPES_H
#define TYPES_H #define TYPES_H
#include <vector> #include <vector>
using namespace std;
namespace mdlp { namespace mdlp {
struct CutPointBody { struct CutPointBody {
size_t start, end; // indices of the sorted vector size_t start, end; // indices of the sorted vector
@@ -8,9 +10,9 @@ namespace mdlp {
float fromValue, toValue; float fromValue, toValue;
}; };
typedef CutPointBody cutPoint_t; typedef CutPointBody cutPoint_t;
typedef std::vector<float> samples; typedef vector<float> samples;
typedef std::vector<int> labels; typedef vector<int> labels;
typedef std::vector<size_t> indices_t; typedef vector<size_t> indices_t;
typedef std::vector<cutPoint_t> cutPoints_t; typedef vector<cutPoint_t> cutPoints_t;
} }
#endif #endif