Refactor tests

This commit is contained in:
2022-12-02 12:54:09 +01:00
parent 0686195854
commit 5657c1cd9f
14 changed files with 265 additions and 174 deletions

View File

@@ -2,16 +2,9 @@
#include <numeric> #include <numeric>
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <stdio.h>
#include <algorithm> #include <algorithm>
#include "Metrics.h" #include "Metrics.h"
namespace mdlp namespace mdlp {
{
std::ostream &operator<<(std::ostream &os, const CutPointBody &cut)
{
os << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << "]";
return os;
}
CPPFImdlp::CPPFImdlp() : debug(false), precision(6) CPPFImdlp::CPPFImdlp() : debug(false), precision(6)
{ {
divider = pow(10, precision); divider = pow(10, precision);
@@ -23,70 +16,125 @@ namespace mdlp
CPPFImdlp::~CPPFImdlp() CPPFImdlp::~CPPFImdlp()
{ {
} }
std::vector<CutPointBody> CPPFImdlp::getCutPoints() std::vector<CutPoint_t> CPPFImdlp::getCutPoints()
{ {
return cutPoints; return cutPoints;
} }
std::vector<float> CPPFImdlp::getDiscretizedValues() labels CPPFImdlp::getDiscretizedValues()
{ {
return xDiscretized; return xDiscretized;
} }
void CPPFImdlp::fit(std::vector<float> &X, std::vector<int> &y) void CPPFImdlp::fit(samples& X, labels& y)
{ {
this->X = X; this->X = X;
this->y = y; this->y = y;
this->indices = sortIndices(X); this->indices = sortIndices(X);
this->xDiscretized = labels(X.size(), -1);
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
computeCutPoints(); computeCutPoints();
filterCutPoints(); filterCutPoints();
applyCutPoints(); applyCutPoints();
} }
std::vector<float> &CPPFImdlp::transform(std::vector<float> &X) labels& CPPFImdlp::transform(samples& X)
{ {
std::vector<size_t> indices_transform = sortIndices(X); indices_t indices_transform = sortIndices(X);
applyCutPoints(); applyCutPoints();
return xDiscretized; return xDiscretized;
} }
void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y) void CPPFImdlp::debugPoints(samples& X, labels& y)
{ {
std::cout << "+++++++++++++++++++++++" << std::endl; std::cout << "+++++++++++++++++++++++" << std::endl;
// for (auto i : sortIndices(X)) // for (auto i : sortIndices(X))
std::vector<size_t> indices = sortIndices(X); indices_t indices = sortIndices(X);
for (size_t i = 0; i < indices.size(); i++) for (size_t i = 0; i < indices.size(); i++) {
{
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]); printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
} }
std::cout << "+++++++++++++++++++++++" << std::endl; std::cout << "+++++++++++++++++++++++" << std::endl;
fit(X, y); fit(X, y);
for (auto item : cutPoints) for (auto item : cutPoints) {
{ std::cout << item.start << " X[" << item.end << "]=" << X[item.end] << std::endl;
std::cout << item << " X[" << item.end << "]=" << X[item.end] << std::endl;
} }
} }
void CPPFImdlp::applyCutPoints() void CPPFImdlp::applyCutPoints()
{ {
for (auto cut : cutPoints) {
for (size_t i = cut.start; i < cut.end; i++) {
xDiscretized[indices[i]] = cut.classNumber;
}
}
} }
bool CPPFImdlp::evaluateCutPoint(CutPointBody point) bool CPPFImdlp::evaluateCutPoint(CutPoint_t rest, CutPoint_t candidate)
{ {
return true; int k, k1, k2;
float ig, delta;
float ent, ent1, ent2;
float N = float(rest.end - rest.start);
if (N < 2) {
return false;
}
k = Metrics::numClasses(y, indices, rest.start, rest.end);
k1 = Metrics::numClasses(y, indices, rest.start, candidate.end);
k2 = Metrics::numClasses(y, indices, candidate.end, rest.end);
ent = Metrics::entropy(y, indices, rest.start, rest.end, numClasses);
ent1 = Metrics::entropy(y, indices, rest.start, candidate.end, numClasses);
ent2 = Metrics::entropy(y, indices, candidate.end, rest.end, numClasses);
ig = Metrics::informationGain(y, indices, rest.start, rest.end, candidate.end, numClasses);
delta = log2(pow(3, k) - 2) - (k * ent - k1 * ent1 - k2 * ent2);
float term = 1 / N * (log2(N - 1) + delta);
std::cout << candidate
std::cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << std::endl;
std::cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << std::endl;
return (ig > term);
} }
void CPPFImdlp::filterCutPoints() void CPPFImdlp::filterCutPoints()
{ {
std::vector<CutPointBody> filtered; std::vector<CutPoint_t> filtered;
for (auto item : cutPoints) CutPoint_t rest;
{ int classNumber = 0;
if (evaluateCutPoint(item))
{ rest.start = 0;
rest.end = X.size();
rest.fromValue = std::numeric_limits<float>::lowest();
rest.toValue = std::numeric_limits<float>::max();
rest.classNumber = classNumber;
bool lastReject = false, first = true;
for (auto item : cutPoints) {
if (evaluateCutPoint(rest, item)) {
std::cout << "Accepted" << std::endl;
if (lastReject) {
if (first) {
item.fromValue = std::numeric_limits<float>::lowest();
item.start = indices[0];
} else {
item.fromValue = filtered.back().toValue;
item.start = filtered.back().end;
}
}
//Assign class number to the interval (cutpoint)
item.classNumber = classNumber++;
filtered.push_back(item); filtered.push_back(item);
first = false;
} else {
std::cout << "Rejected" << std::endl;
lastReject = true;
} }
} }
if (!first)
filtered.back().toValue = std::numeric_limits<float>::max();
else {
filtered.push_back(rest);
}
cutPoints = filtered; cutPoints = filtered;
} }
void CPPFImdlp::computeCutPoints() void CPPFImdlp::computeCutPoints()
{ {
std::vector<CutPointBody> cutPts; std::vector<CutPoint_t> cutPts;
CutPointBody cutPoint; CutPoint_t cutPoint;
std::vector<size_t> cutIdx; indices_t cutIdx;
float xPrev, xCur, xPivot; float xPrev, xCur, xPivot;
int yPrev, yCur, yPivot; int yPrev, yCur, yPivot;
size_t idxPrev, idxPivot, idx, numElements, start; size_t idxPrev, idxPivot, idx, numElements, start;
@@ -99,28 +147,25 @@ namespace mdlp
bool firstCutPoint = true; bool firstCutPoint = true;
if (debug) if (debug)
printf("*idx=%lu -> (-1, -1) Prev(%3.1f, %d) Elementos: %lu\n", idx, xCur, yCur, numElements); printf("*idx=%lu -> (-1, -1) Prev(%3.1f, %d) Elementos: %lu\n", idx, xCur, yCur, numElements);
while (idx < numElements) while (idx < numElements) {
{
xPivot = xCur; xPivot = xCur;
yPivot = yCur; yPivot = yCur;
idxPivot = indices[idx]; idxPivot = indices[idx];
if (debug) if (debug)
printf("<idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur); printf("<idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
// Read the same values and check class changes // Read the same values and check class changes
do do {
{
idx++; idx++;
xCur = X[indices[idx]]; xCur = X[indices[idx]];
yCur = y[indices[idx]]; yCur = y[indices[idx]];
if (yCur != yPivot && xCur == xPivot) if (yCur != yPivot && xCur == xPivot) {
{
yPivot = -1; yPivot = -1;
} }
if (debug) if (debug)
printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur); printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
} while (idx < numElements && xCur == xPivot); }
if (yPivot == -1 || yPrev != yCur) while (idx < numElements && xCur == xPivot);
{ if (yPivot == -1 || yPrev != yCur) {
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = idx - 1; cutPoint.end = idx - 1;
start = idx; start = idx;
@@ -128,8 +173,7 @@ namespace mdlp
cutPoint.toValue = (xPrev + xCur) / 2; cutPoint.toValue = (xPrev + xCur) / 2;
cutPoint.classNumber = -1; cutPoint.classNumber = -1;
firstCutPoint = false; firstCutPoint = false;
if (debug) if (debug) {
{
printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
} }
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
@@ -139,8 +183,7 @@ namespace mdlp
xPrev = xPivot; xPrev = xPivot;
idxPrev = indices[idxPivot]; idxPrev = indices[idxPivot];
} }
if (idx == numElements) if (idx == numElements) {
{
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = numElements; cutPoint.end = numElements;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue; cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
@@ -155,31 +198,27 @@ namespace mdlp
} }
void CPPFImdlp::computeCutPointsAnt() void CPPFImdlp::computeCutPointsAnt()
{ {
std::vector<float> cutPts; samples cutPts;
std::vector<int> cutIdx; labels cutIdx;
float xPrev, cutPoint; float xPrev, cutPoint;
int yPrev; int yPrev;
size_t idxPrev; size_t idxPrev;
xPrev = X.at(indices[0]); xPrev = X.at(indices[0]);
yPrev = y.at(indices[0]); yPrev = y.at(indices[0]);
idxPrev = indices[0]; idxPrev = indices[0];
if (debug) if (debug) {
{
std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl; std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl;
} }
for (auto index = indices.begin(); index != indices.end(); ++index) for (auto index = indices.begin(); index != indices.end(); ++index) {
{
// Definition 2 Cut points are always on boundaries // Definition 2 Cut points are always on boundaries
if (y.at(*index) != yPrev && xPrev < X.at(*index)) if (y.at(*index) != yPrev && xPrev < X.at(*index)) {
{
cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider; cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider;
if (debug) if (debug) {
{
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //"; std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev; std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" std::cout << "* (" << X.at(*index) << ", " << xPrev << ")="
<< ((X.at(*index) + xPrev) / 2) << "idxPrev" << ((X.at(*index) + xPrev) / 2) << "idxPrev"
<< idxPrev << std::endl; << idxPrev << std::endl;
} }
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
cutIdx.push_back(idxPrev); cutIdx.push_back(idxPrev);
@@ -191,13 +230,13 @@ namespace mdlp
// cutPoints = cutPts; // cutPoints = cutPts;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X) indices_t CPPFImdlp::sortIndices(samples& X)
{ {
std::vector<size_t> idx(X.size()); indices_t idx(X.size());
std::iota(idx.begin(), idx.end(), 0); std::iota(idx.begin(), idx.end(), 0);
for (std::size_t i = 0; i < X.size(); i++) for (std::size_t i = 0; i < X.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2) stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2)
{ return X[i1] < X[i2]; }); { return X[i1] < X[i2]; });
return idx; return idx;
} }
} }

View File

@@ -1,44 +1,37 @@
#ifndef CPPFIMDLP_H #ifndef CPPFIMDLP_H
#define CPPFIMDLP_H #define CPPFIMDLP_H
#include <vector> #include "typesFImdlp.h"
#include <utility> #include <utility>
namespace mdlp namespace mdlp {
{ class CPPFImdlp {
struct CutPointBody
{
size_t start, end; // indices of the sorted vector
int classNumber; // class assigned to the cut point
float fromValue, toValue; // Values of the variable
};
class CPPFImdlp
{
private: private:
bool debug; bool debug;
int precision; int precision;
float divider; float divider;
std::vector<size_t> indices; // sorted indices to use with X and y indices_t indices; // sorted indices to use with X and y
std::vector<float> X; samples X;
std::vector<int> y; labels y;
std::vector<float> xDiscretized; labels xDiscretized;
std::vector<CutPointBody> cutPoints; int numClasses;
std::vector<CutPoint_t> cutPoints;
protected: protected:
std::vector<size_t> sortIndices(std::vector<float> &); indices_t sortIndices(samples&);
bool evaluateCutPoint(CutPointBody);
void filterCutPoints();
void computeCutPoints();
void applyCutPoints();
void computeCutPointsAnt(); void computeCutPointsAnt();
void computeCutPoints();
bool evaluateCutPoint(CutPoint_t, CutPoint_t);
void filterCutPoints();
void applyCutPoints();
public: public:
CPPFImdlp(); CPPFImdlp();
CPPFImdlp(int, bool debug = false); CPPFImdlp(int, bool debug = false);
~CPPFImdlp(); ~CPPFImdlp();
std::vector<CutPointBody> getCutPoints(); std::vector<CutPoint_t> getCutPoints();
std::vector<float> getDiscretizedValues(); labels getDiscretizedValues();
void debugPoints(std::vector<float> &, std::vector<int> &); void debugPoints(samples&, labels&);
void fit(std::vector<float> &, std::vector<int> &); void fit(samples&, labels&);
std::vector<float> &transform(std::vector<float> &); labels& transform(samples&);
}; };
} }
#endif #endif

View File

@@ -1,40 +1,35 @@
#include "Metrics.h" #include "Metrics.h"
#include <set> #include <set>
namespace mdlp namespace mdlp {
{
Metrics::Metrics() Metrics::Metrics()
{ {
} }
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, size_t start, size_t end) int Metrics::numClasses(labels& y, indices_t indices, size_t start, size_t end)
{ {
std::set<int> numClasses; std::set<int> numClasses;
for (auto i = start; i < end; ++i) for (auto i = start; i < end; ++i) {
{
numClasses.insert(y[indices[i]]); numClasses.insert(y[indices[i]]);
} }
return numClasses.size(); return numClasses.size();
} }
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, int nClasses) float Metrics::entropy(labels& y, indices_t& indices, size_t start, size_t end, int nClasses)
{ {
float entropy = 0; float entropy = 0;
int nElements = 0; int nElements = 0;
std::vector<int> counts(nClasses + 1, 0); labels counts(nClasses + 1, 0);
for (auto i = &indices[start]; i != &indices[end]; ++i) for (auto i = &indices[start]; i != &indices[end]; ++i) {
{
counts[y[*i]]++; counts[y[*i]]++;
nElements++; nElements++;
} }
for (auto count : counts) for (auto count : counts) {
{ if (count > 0) {
if (count > 0)
{
float p = (float)count / nElements; float p = (float)count / nElements;
entropy -= p * log2(p); entropy -= p * log2(p);
} }
} }
return entropy; return entropy;
} }
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, size_t cutPoint, int nClasses) float Metrics::informationGain(labels& y, indices_t& indices, size_t start, size_t end, size_t cutPoint, int nClasses)
{ {
float iGain = 0.0; float iGain = 0.0;
float entropy, entropyLeft, entropyRight; float entropy, entropyLeft, entropyRight;

View File

@@ -1,16 +1,14 @@
#ifndef METRICS_H #ifndef METRICS_H
#define METRICS_H #define METRICS_H
#include <vector> #include "typesFImdlp.h"
#include <cmath> #include <cmath>
namespace mdlp namespace mdlp {
{ class Metrics {
class Metrics
{
public: public:
Metrics(); Metrics();
static int numClasses(std::vector<int> &, std::vector<size_t>, size_t, size_t); static int numClasses(labels&, indices_t, size_t, size_t);
static float entropy(std::vector<int> &, std::vector<size_t> &, size_t, size_t, int); static float entropy(labels&, indices_t&, size_t, size_t, int);
static float informationGain(std::vector<int> &, std::vector<size_t> &, size_t, size_t, size_t, int); static float informationGain(labels&, indices_t&, size_t, size_t, size_t, int);
}; };
} }
#endif #endif

View File

@@ -12,13 +12,13 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
CPPFImdlp() except + CPPFImdlp() except +
CPPFImdlp(int, bool) except + CPPFImdlp(int, bool) except +
void fit(vector[float]&, vector[int]&) void fit(vector[float]&, vector[int]&)
vector[float] transform(vector[float]&) vector[int] transform(vector[float]&)
vector[float] getDiscretizedValues() vector[int] getDiscretizedValues()
vector[CutPointBody] getCutPoints() vector[CutPointBody] getCutPoints()
void debugPoints(vector[float]&, vector[int]&) void debugPoints(vector[float]&, vector[int]&)
class PCutPointBody: class PCutPoint_t:
def __init__(self, start, end, fromValue, toValue): def __init__(self, start, end, fromValue, toValue):
self.start = start self.start = start
self.end = end self.end = end
@@ -37,7 +37,7 @@ cdef class CFImdlp:
return self.thisptr.transform(X) return self.thisptr.transform(X)
def get_discretized_values(self): def get_discretized_values(self):
return self.thisptr.getDiscretizedValues() return self.thisptr.getDiscretizedValues()
def get_cut_points(self, X, y): def get_cut_points(self):
return self.thisptr.getCutPoints() return self.thisptr.getCutPoints()
def debug_points(self, X, y): def debug_points(self, X, y):
return self.thisptr.debugPoints(X, y) return self.thisptr.debugPoints(X, y)

View File

@@ -1,39 +1,76 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "../Metrics.h"
#include "../CPPFImdlp.h" #include "../CPPFImdlp.h"
namespace namespace mdlp {
{ class TestMetrics : public CPPFImdlp, public testing::Test {
float precision = 0.000001;
class TestMetrics : protected mdlp::CPPFImdlp
{
public: public:
std::vector<size_t> testSort(std::vector<float> &X) //TestMetrics(samples X, labels y, indices_t indices) : X(X), y(y), indices(indices), CPPFImdlp(true) {}
indices_t indices; // sorted indices to use with X and y
samples X;
labels y;
samples xDiscretized;
int numClasses;
float precision_test = 0.000001;
void SetUp() override
{ {
return sortIndices(X); X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
numClasses = 2;
}
void check_sorted_vector(samples& X, indices_t indices)
{
this->X = X;
this->indices = indices;
indices_t testSortedIndices = sortIndices(X);
float prev = X[testSortedIndices[0]];
for (auto i = 0; i < X.size(); ++i) {
EXPECT_EQ(testSortedIndices[i], indices[i]);
EXPECT_LE(prev, X[testSortedIndices[i]]);
prev = X[testSortedIndices[i]];
}
}
std::vector<CutPoint_t> testCutPoints(samples& X, indices_t& indices, labels& y)
{
this->X = X;
this->y = y;
this->indices = indices;
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
//computeCutPoints();
return getCutPoints();
} }
}; };
void check_sorted_vector(std::vector<float> &X, std::vector<size_t> indices) //
TEST_F(TestMetrics, SortIndices)
{ {
TestMetrics testClass = TestMetrics(); samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
std::vector<size_t> testSortedIndices = testClass.testSort(X); indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
float prev = X[testSortedIndices[0]]; check_sorted_vector(X, indices);
for (auto i = 0; i < X.size(); ++i) X = { 5.77, 5.88, 5.99 };
{ indices = { 0, 1, 2 };
EXPECT_EQ(testSortedIndices[i], indices[i]); check_sorted_vector(X, indices);
EXPECT_LE(prev, X[testSortedIndices[i]]); X = { 5.33, 5.22, 5.11 };
prev = X[testSortedIndices[i]]; indices = { 2, 1, 0 };
check_sorted_vector(X, indices);
}
// TEST_F(TestMetrics, EvaluateCutPoint)
// {
// CutPoint_t rest, candidate;
// rest.start = 0;
// rest.end = 10;
// candidate.start = 0;
// candidate.end = 5;
// float computed = evaluateCutPoint(rest, candidate);
// ASSERT_NEAR(0.468996, computed, precision_test);
// }
TEST_F(TestMetrics, ComputeCutPoints)
{
std::vector<CutPoint_t> computed, expected;
computeCutPoints();
computed = getCutPoints();
for (auto cut : computed) {
std::cout << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << ")" << std::endl;
} }
} }
TEST(FImdlpTest, SortIndices)
{
std::vector<float> X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9};
std::vector<size_t> indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7};
check_sorted_vector(X, indices);
X = {5.77, 5.88, 5.99};
indices = {0, 1, 2};
check_sorted_vector(X, indices);
X = {5.33, 5.22, 5.11};
indices = {2, 1, 0};
check_sorted_vector(X, indices);
}
} }

View File

@@ -1,33 +1,31 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "../Metrics.h" #include "../Metrics.h"
namespace namespace mdlp {
{
float precision = 0.000001; float precision = 0.000001;
TEST(MetricTest, NumClasses) TEST(MetricTest, NumClasses)
{ {
std::vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; labels y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
EXPECT_EQ(1, mdlp::Metrics::numClasses(y, indices, 4, 8)); EXPECT_EQ(1, Metrics::numClasses(y, indices, 4, 8));
EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 0, 10)); EXPECT_EQ(2, Metrics::numClasses(y, indices, 0, 10));
EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 8, 10)); EXPECT_EQ(2, Metrics::numClasses(y, indices, 8, 10));
} }
TEST(MetricTest, Entropy) TEST(MetricTest, Entropy)
{ {
std::vector<int> y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
EXPECT_EQ(1, mdlp::Metrics::entropy(y, indices, 0, 10, 2)); EXPECT_EQ(1, Metrics::entropy(y, indices, 0, 10, 2));
EXPECT_EQ(0, mdlp::Metrics::entropy(y, indices, 0, 5, 1)); EXPECT_EQ(0, Metrics::entropy(y, indices, 0, 5, 1));
std::vector<int> yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
ASSERT_NEAR(0.468996, mdlp::Metrics::entropy(yz, indices, 0, 10, 2), precision); ASSERT_NEAR(0.468996, Metrics::entropy(yz, indices, 0, 10, 2), precision);
} }
TEST(MetricTest, InformationGain) TEST(MetricTest, InformationGain)
{ {
std::vector<int> y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
std::vector<int> yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
ASSERT_NEAR(1, mdlp::Metrics::informationGain(y, indices, 0, 10, 5, 2), precision); ASSERT_NEAR(1, Metrics::informationGain(y, indices, 0, 10, 5, 2), precision);
ASSERT_NEAR(0.108032, mdlp::Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision); ASSERT_NEAR(0.108032, Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision);
} }
} }

12
fimdlp/testcpp/test Executable file
View File

@@ -0,0 +1,12 @@
cmake -S . -B build -Wno-dev
if test $? -ne 0; then
echo "Error in creating build commands."
exit 1
fi
cmake --build build
if test $? -ne 0; then
echo "Error in build command."
exit 1
fi
cd build
ctest --output-on-failure

View File

@@ -1,4 +1,4 @@
cmake -S . -B build cmake -S . -B build -Wno-dev
if test $? -ne 0; then if test $? -ne 0; then
echo "Error in creating build commands." echo "Error in creating build commands."
exit 1 exit 1

15
fimdlp/typesFImdlp.h Normal file
View File

@@ -0,0 +1,15 @@
#ifndef TYPES_H
#define TYPES_H
#include <vector>
namespace mdlp {
typedef std::vector<float> samples;
typedef std::vector<int> labels;
typedef std::vector<size_t> indices_t;
struct CutPointBody {
size_t start, end; // indices of the sorted vector
int classNumber; // class assigned to the cut point
float fromValue, toValue;
};
typedef CutPointBody CutPoint_t;
}
#endif

View File

@@ -1,21 +1,18 @@
#include "FImdlp.h" #include "FImdlp.h"
namespace FImdlp namespace FImdlp {
{
FImdlp::FImdlp() FImdlp::FImdlp()
{ {
} }
FImdlp::~FImdlp() FImdlp::~FImdlp()
{ {
} }
std::vector<float> FImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y) samples FImdlp::cutPoints(labels& X, labels& y)
{ {
std::vector<float> cutPts; samples cutPts;
int i, ant = X.at(0); int i, ant = X.at(0);
int n = X.size(); int n = X.size();
for (i = 1; i < n; i++) for (i = 1; i < n; i++) {
{ if (X.at(i) != ant) {
if (X.at(i) != ant)
{
cutPts.push_back(float(X.at(i) + ant) / 2); cutPts.push_back(float(X.at(i) + ant) / 2);
ant = X.at(i); ant = X.at(i);
} }

View File

@@ -2,14 +2,12 @@
#define FIMDLP_H #define FIMDLP_H
#include <vector> #include <vector>
#include <Python.h> #include <Python.h>
namespace FImdlp namespace FImdlp {
{ class FImdlp {
class FImdlp
{
public: public:
FImdlp(); FImdlp();
~FImdlp(); ~FImdlp();
std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &); samples cutPoints(labels&, labels&);
}; };
} }
#endif #endif

View File

@@ -18,13 +18,22 @@ test = CFImdlp(debug=False)
# k = test.cut_points_ant(X[:, 0], y) # k = test.cut_points_ant(X[:, 0], y)
# print(k) # print(k)
# test.debug_points(X[:, 0], y) # test.debug_points(X[:, 0], y)
result = test.cut_points(X[:, 0], y) X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# test.fit(X[:, 0], y)
test.fit(X, y)
result = test.get_cut_points()
for item in result: for item in result:
print( print(
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
) )
print(test.get_discretized_values())
# print(test.transform(X))
# print(X)
# print(indices)
# print(np.array(X)[indices])
# X = np.array( # X = np.array(
# [ # [