mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 08:25:51 +00:00
refactor tests
This commit is contained in:
1
Makefile
1
Makefile
@@ -11,6 +11,7 @@ test:
|
|||||||
cd fimdlp/testcpp && ./test
|
cd fimdlp/testcpp && ./test
|
||||||
|
|
||||||
coverage:
|
coverage:
|
||||||
|
make test
|
||||||
cd fimdlp/testcpp && ./cover
|
cd fimdlp/testcpp && ./cover
|
||||||
|
|
||||||
lint: ## Lint and static-check
|
lint: ## Lint and static-check
|
||||||
|
@@ -3,12 +3,13 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
std::ostream& operator << (std::ostream& os, const cutPoint_t& cut)
|
ostream& operator << (ostream& os, const cutPoint_t& cut)
|
||||||
{
|
{
|
||||||
os << cut.classNumber << " -> (" << cut.start << ", " << cut.end <<
|
os << cut.classNumber << " -> (" << cut.start << ", " << cut.end <<
|
||||||
") - (" << cut.fromValue << ", " << cut.toValue << ") "
|
") - (" << cut.fromValue << ", " << cut.toValue << ") "
|
||||||
<< std::endl;
|
<< endl;
|
||||||
return os;
|
return os;
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -27,7 +28,7 @@ namespace mdlp {
|
|||||||
samples CPPFImdlp::getCutPoints()
|
samples CPPFImdlp::getCutPoints()
|
||||||
{
|
{
|
||||||
samples output(cutPoints.size());
|
samples output(cutPoints.size());
|
||||||
std::transform(cutPoints.begin(), cutPoints.end(), output.begin(),
|
::transform(cutPoints.begin(), cutPoints.end(), output.begin(),
|
||||||
[](cutPoint_t cut) { return cut.toValue; });
|
[](cutPoint_t cut) { return cut.toValue; });
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
@@ -40,11 +41,11 @@ namespace mdlp {
|
|||||||
X = X_;
|
X = X_;
|
||||||
y = y_;
|
y = y_;
|
||||||
if (X.size() != y.size()) {
|
if (X.size() != y.size()) {
|
||||||
std::cerr << "X and y must have the same size" << std::endl;
|
cerr << "X and y must have the same size" << endl;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
if (X.size() == 0) {
|
if (X.size() == 0) {
|
||||||
std::cerr << "X and y must have at least one element" << std::endl;
|
cerr << "X and y must have at least one element" << endl;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
this->indices = sortIndices(X_);
|
this->indices = sortIndices(X_);
|
||||||
@@ -84,10 +85,10 @@ namespace mdlp {
|
|||||||
delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2);
|
delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2);
|
||||||
float term = 1 / N * (log2(N - 1) + delta);
|
float term = 1 / N * (log2(N - 1) + delta);
|
||||||
if (debug) {
|
if (debug) {
|
||||||
std::cout << "Rest: " << rest;
|
cout << "Rest: " << rest;
|
||||||
std::cout << "Candidate: " << candidate;
|
cout << "Candidate: " << candidate;
|
||||||
std::cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << std::endl;
|
cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << endl;
|
||||||
std::cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << std::endl;
|
cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << endl;
|
||||||
}
|
}
|
||||||
return (ig > term);
|
return (ig > term);
|
||||||
}
|
}
|
||||||
@@ -99,15 +100,15 @@ namespace mdlp {
|
|||||||
|
|
||||||
rest.start = 0;
|
rest.start = 0;
|
||||||
rest.end = X.size();
|
rest.end = X.size();
|
||||||
rest.fromValue = std::numeric_limits<float>::lowest();
|
rest.fromValue = numeric_limits<float>::lowest();
|
||||||
rest.toValue = std::numeric_limits<float>::max();
|
rest.toValue = numeric_limits<float>::max();
|
||||||
rest.classNumber = classNumber;
|
rest.classNumber = classNumber;
|
||||||
bool first = true;
|
bool first = true;
|
||||||
for (size_t index = 0; index < size_t(cutPoints.size()); index++) {
|
for (size_t index = 0; index < size_t(cutPoints.size()); index++) {
|
||||||
item = cutPoints[index];
|
item = cutPoints[index];
|
||||||
if (evaluateCutPoint(rest, item)) {
|
if (evaluateCutPoint(rest, item)) {
|
||||||
if (debug)
|
if (debug)
|
||||||
std::cout << "Accepted: " << item << std::endl;
|
cout << "Accepted: " << item << endl;
|
||||||
//Assign class number to the interval (cutpoint)
|
//Assign class number to the interval (cutpoint)
|
||||||
item.classNumber = classNumber++;
|
item.classNumber = classNumber++;
|
||||||
filtered.push_back(item);
|
filtered.push_back(item);
|
||||||
@@ -115,11 +116,11 @@ namespace mdlp {
|
|||||||
rest.start = item.end;
|
rest.start = item.end;
|
||||||
} else {
|
} else {
|
||||||
if (debug)
|
if (debug)
|
||||||
std::cout << "Rejected: " << item << std::endl;
|
cout << "Rejected: " << item << endl;
|
||||||
if (index != size_t(cutPoints.size()) - 1) {
|
if (index != size_t(cutPoints.size()) - 1) {
|
||||||
// Try to merge the rejected cutpoint with the next one
|
// Try to merge the rejected cutpoint with the next one
|
||||||
if (first) {
|
if (first) {
|
||||||
cutPoints[index + 1].fromValue = std::numeric_limits<float>::lowest();
|
cutPoints[index + 1].fromValue = numeric_limits<float>::lowest();
|
||||||
cutPoints[index + 1].start = indices[0];
|
cutPoints[index + 1].start = indices[0];
|
||||||
} else {
|
} else {
|
||||||
cutPoints[index + 1].fromValue = item.fromValue;
|
cutPoints[index + 1].fromValue = item.fromValue;
|
||||||
@@ -129,7 +130,7 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!first) {
|
if (!first) {
|
||||||
filtered.back().toValue = std::numeric_limits<float>::max();
|
filtered.back().toValue = numeric_limits<float>::max();
|
||||||
filtered.back().end = X.size() - 1;
|
filtered.back().end = X.size() - 1;
|
||||||
} else {
|
} else {
|
||||||
filtered.push_back(rest);
|
filtered.push_back(rest);
|
||||||
@@ -175,7 +176,7 @@ namespace mdlp {
|
|||||||
cutPoint.start = start;
|
cutPoint.start = start;
|
||||||
cutPoint.end = idx;
|
cutPoint.end = idx;
|
||||||
start = idx;
|
start = idx;
|
||||||
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
|
cutPoint.fromValue = firstCutPoint ? numeric_limits<float>::lowest() : cutPts.back().toValue;
|
||||||
cutPoint.toValue = (xPrev + xCur) / 2;
|
cutPoint.toValue = (xPrev + xCur) / 2;
|
||||||
cutPoint.classNumber = -1;
|
cutPoint.classNumber = -1;
|
||||||
firstCutPoint = false;
|
firstCutPoint = false;
|
||||||
@@ -190,17 +191,17 @@ namespace mdlp {
|
|||||||
if (idx == numElements) {
|
if (idx == numElements) {
|
||||||
cutPoint.start = start;
|
cutPoint.start = start;
|
||||||
cutPoint.end = numElements + 1;
|
cutPoint.end = numElements + 1;
|
||||||
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
|
cutPoint.fromValue = firstCutPoint ? numeric_limits<float>::lowest() : cutPts.back().toValue;
|
||||||
cutPoint.toValue = std::numeric_limits<float>::max();
|
cutPoint.toValue = numeric_limits<float>::max();
|
||||||
cutPoint.classNumber = -1;
|
cutPoint.classNumber = -1;
|
||||||
if (debug)
|
if (debug)
|
||||||
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
}
|
}
|
||||||
if (debug) {
|
if (debug) {
|
||||||
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << std::endl;
|
cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << endl;
|
||||||
for (auto cutPt : cutPts)
|
for (auto cutPt : cutPts)
|
||||||
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposal: Cut point: " << cutPt;
|
cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposal: Cut point: " << cutPt;
|
||||||
}
|
}
|
||||||
cutPoints = cutPts;
|
cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
@@ -224,7 +225,7 @@ namespace mdlp {
|
|||||||
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
||||||
if (first) {
|
if (first) {
|
||||||
first = false;
|
first = false;
|
||||||
cutPoint.fromValue = std::numeric_limits<float>::lowest();
|
cutPoint.fromValue = numeric_limits<float>::lowest();
|
||||||
} else {
|
} else {
|
||||||
cutPoint.fromValue = cutPts.back().toValue;
|
cutPoint.fromValue = cutPts.back().toValue;
|
||||||
}
|
}
|
||||||
@@ -241,16 +242,16 @@ namespace mdlp {
|
|||||||
if (first) {
|
if (first) {
|
||||||
cutPoint.start = 0;
|
cutPoint.start = 0;
|
||||||
cutPoint.classNumber = -1;
|
cutPoint.classNumber = -1;
|
||||||
cutPoint.fromValue = std::numeric_limits<float>::lowest();
|
cutPoint.fromValue = numeric_limits<float>::lowest();
|
||||||
cutPoint.toValue = std::numeric_limits<float>::max();
|
cutPoint.toValue = numeric_limits<float>::max();
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
} else
|
} else
|
||||||
cutPts.back().toValue = std::numeric_limits<float>::max();
|
cutPts.back().toValue = numeric_limits<float>::max();
|
||||||
cutPts.back().end = X.size();
|
cutPts.back().end = X.size();
|
||||||
if (debug) {
|
if (debug) {
|
||||||
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << std::endl;
|
cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << endl;
|
||||||
for (auto cutPt : cutPts)
|
for (auto cutPt : cutPts)
|
||||||
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt;
|
cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt;
|
||||||
}
|
}
|
||||||
cutPoints = cutPts;
|
cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
@@ -258,8 +259,8 @@ namespace mdlp {
|
|||||||
indices_t CPPFImdlp::sortIndices(samples& X_)
|
indices_t CPPFImdlp::sortIndices(samples& X_)
|
||||||
{
|
{
|
||||||
indices_t idx(X_.size());
|
indices_t idx(X_.size());
|
||||||
std::iota(idx.begin(), idx.end(), 0);
|
iota(idx.begin(), idx.end(), 0);
|
||||||
for (std::size_t i = 0; i < X_.size(); i++)
|
for (size_t i = 0; i < X_.size(); i++)
|
||||||
stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
|
stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
|
||||||
{ return X_[i1] < X_[i2]; });
|
{ return X_[i1] < X_[i2]; });
|
||||||
return idx;
|
return idx;
|
||||||
|
Binary file not shown.
@@ -49,6 +49,14 @@ namespace mdlp {
|
|||||||
EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision);
|
EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
template<typename T, typename A>
|
||||||
|
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(expected.size(), computed.size());
|
||||||
|
for (auto i = 0; i < expected.size(); i++) {
|
||||||
|
EXPECT_EQ(expected[i], computed[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
TEST_F(TestFImdlp, SortIndices)
|
TEST_F(TestFImdlp, SortIndices)
|
||||||
@@ -72,7 +80,7 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||||
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||||
@@ -83,7 +91,7 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||||
};
|
};
|
||||||
@@ -95,7 +103,7 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsProposal)
|
TEST_F(TestFImdlp, ComputeCutPointsProposal)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
||||||
{ 6, 9, -1, 5.4, 5.85 },
|
{ 6, 9, -1, 5.4, 5.85 },
|
||||||
@@ -106,7 +114,7 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
|
TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||||
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||||
@@ -117,4 +125,17 @@ namespace mdlp {
|
|||||||
computeCutPointsProposal();
|
computeCutPointsProposal();
|
||||||
checkCutPoints(expected);
|
checkCutPoints(expected);
|
||||||
}
|
}
|
||||||
|
TEST_F(TestFImdlp, DiscretizedValues)
|
||||||
|
{
|
||||||
|
labels computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
computed = getDiscretizedValues();
|
||||||
|
checkVectors(expected, computed);
|
||||||
|
}
|
||||||
|
TEST_F(TestFImdlp, GetCutPoints)
|
||||||
|
{
|
||||||
|
samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
|
||||||
|
computeCutPointsOriginal();
|
||||||
|
computed = getCutPoints();
|
||||||
|
checkVectors(expected, computed);
|
||||||
|
}
|
||||||
}
|
}
|
@@ -1,6 +1,8 @@
|
|||||||
#ifndef TYPES_H
|
#ifndef TYPES_H
|
||||||
#define TYPES_H
|
#define TYPES_H
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
struct CutPointBody {
|
struct CutPointBody {
|
||||||
size_t start, end; // indices of the sorted vector
|
size_t start, end; // indices of the sorted vector
|
||||||
@@ -8,9 +10,9 @@ namespace mdlp {
|
|||||||
float fromValue, toValue;
|
float fromValue, toValue;
|
||||||
};
|
};
|
||||||
typedef CutPointBody cutPoint_t;
|
typedef CutPointBody cutPoint_t;
|
||||||
typedef std::vector<float> samples;
|
typedef vector<float> samples;
|
||||||
typedef std::vector<int> labels;
|
typedef vector<int> labels;
|
||||||
typedef std::vector<size_t> indices_t;
|
typedef vector<size_t> indices_t;
|
||||||
typedef std::vector<cutPoint_t> cutPoints_t;
|
typedef vector<cutPoint_t> cutPoints_t;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
Reference in New Issue
Block a user