mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Refactor tests
This commit is contained in:
@@ -2,16 +2,9 @@
|
|||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <stdio.h>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
namespace mdlp
|
namespace mdlp {
|
||||||
{
|
|
||||||
std::ostream &operator<<(std::ostream &os, const CutPointBody &cut)
|
|
||||||
{
|
|
||||||
os << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << "]";
|
|
||||||
return os;
|
|
||||||
}
|
|
||||||
CPPFImdlp::CPPFImdlp() : debug(false), precision(6)
|
CPPFImdlp::CPPFImdlp() : debug(false), precision(6)
|
||||||
{
|
{
|
||||||
divider = pow(10, precision);
|
divider = pow(10, precision);
|
||||||
@@ -23,70 +16,125 @@ namespace mdlp
|
|||||||
CPPFImdlp::~CPPFImdlp()
|
CPPFImdlp::~CPPFImdlp()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
std::vector<CutPointBody> CPPFImdlp::getCutPoints()
|
std::vector<CutPoint_t> CPPFImdlp::getCutPoints()
|
||||||
{
|
{
|
||||||
return cutPoints;
|
return cutPoints;
|
||||||
}
|
}
|
||||||
std::vector<float> CPPFImdlp::getDiscretizedValues()
|
labels CPPFImdlp::getDiscretizedValues()
|
||||||
{
|
{
|
||||||
return xDiscretized;
|
return xDiscretized;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::fit(std::vector<float> &X, std::vector<int> &y)
|
void CPPFImdlp::fit(samples& X, labels& y)
|
||||||
{
|
{
|
||||||
this->X = X;
|
this->X = X;
|
||||||
this->y = y;
|
this->y = y;
|
||||||
this->indices = sortIndices(X);
|
this->indices = sortIndices(X);
|
||||||
|
this->xDiscretized = labels(X.size(), -1);
|
||||||
|
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
|
||||||
|
|
||||||
computeCutPoints();
|
computeCutPoints();
|
||||||
filterCutPoints();
|
filterCutPoints();
|
||||||
applyCutPoints();
|
applyCutPoints();
|
||||||
}
|
}
|
||||||
std::vector<float> &CPPFImdlp::transform(std::vector<float> &X)
|
labels& CPPFImdlp::transform(samples& X)
|
||||||
{
|
{
|
||||||
std::vector<size_t> indices_transform = sortIndices(X);
|
indices_t indices_transform = sortIndices(X);
|
||||||
applyCutPoints();
|
applyCutPoints();
|
||||||
return xDiscretized;
|
return xDiscretized;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y)
|
void CPPFImdlp::debugPoints(samples& X, labels& y)
|
||||||
{
|
{
|
||||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||||
// for (auto i : sortIndices(X))
|
// for (auto i : sortIndices(X))
|
||||||
std::vector<size_t> indices = sortIndices(X);
|
indices_t indices = sortIndices(X);
|
||||||
for (size_t i = 0; i < indices.size(); i++)
|
for (size_t i = 0; i < indices.size(); i++) {
|
||||||
{
|
|
||||||
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
|
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
|
||||||
}
|
}
|
||||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
for (auto item : cutPoints)
|
for (auto item : cutPoints) {
|
||||||
{
|
std::cout << item.start << " X[" << item.end << "]=" << X[item.end] << std::endl;
|
||||||
std::cout << item << " X[" << item.end << "]=" << X[item.end] << std::endl;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void CPPFImdlp::applyCutPoints()
|
void CPPFImdlp::applyCutPoints()
|
||||||
{
|
{
|
||||||
|
for (auto cut : cutPoints) {
|
||||||
|
for (size_t i = cut.start; i < cut.end; i++) {
|
||||||
|
xDiscretized[indices[i]] = cut.classNumber;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
bool CPPFImdlp::evaluateCutPoint(CutPointBody point)
|
bool CPPFImdlp::evaluateCutPoint(CutPoint_t rest, CutPoint_t candidate)
|
||||||
{
|
{
|
||||||
return true;
|
int k, k1, k2;
|
||||||
|
float ig, delta;
|
||||||
|
float ent, ent1, ent2;
|
||||||
|
float N = float(rest.end - rest.start);
|
||||||
|
if (N < 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
k = Metrics::numClasses(y, indices, rest.start, rest.end);
|
||||||
|
k1 = Metrics::numClasses(y, indices, rest.start, candidate.end);
|
||||||
|
k2 = Metrics::numClasses(y, indices, candidate.end, rest.end);
|
||||||
|
ent = Metrics::entropy(y, indices, rest.start, rest.end, numClasses);
|
||||||
|
ent1 = Metrics::entropy(y, indices, rest.start, candidate.end, numClasses);
|
||||||
|
ent2 = Metrics::entropy(y, indices, candidate.end, rest.end, numClasses);
|
||||||
|
ig = Metrics::informationGain(y, indices, rest.start, rest.end, candidate.end, numClasses);
|
||||||
|
delta = log2(pow(3, k) - 2) - (k * ent - k1 * ent1 - k2 * ent2);
|
||||||
|
float term = 1 / N * (log2(N - 1) + delta);
|
||||||
|
std::cout << candidate
|
||||||
|
std::cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << std::endl;
|
||||||
|
std::cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << std::endl;
|
||||||
|
return (ig > term);
|
||||||
}
|
}
|
||||||
void CPPFImdlp::filterCutPoints()
|
void CPPFImdlp::filterCutPoints()
|
||||||
{
|
{
|
||||||
std::vector<CutPointBody> filtered;
|
std::vector<CutPoint_t> filtered;
|
||||||
for (auto item : cutPoints)
|
CutPoint_t rest;
|
||||||
{
|
int classNumber = 0;
|
||||||
if (evaluateCutPoint(item))
|
|
||||||
{
|
rest.start = 0;
|
||||||
|
rest.end = X.size();
|
||||||
|
rest.fromValue = std::numeric_limits<float>::lowest();
|
||||||
|
rest.toValue = std::numeric_limits<float>::max();
|
||||||
|
rest.classNumber = classNumber;
|
||||||
|
bool lastReject = false, first = true;
|
||||||
|
for (auto item : cutPoints) {
|
||||||
|
if (evaluateCutPoint(rest, item)) {
|
||||||
|
std::cout << "Accepted" << std::endl;
|
||||||
|
if (lastReject) {
|
||||||
|
if (first) {
|
||||||
|
item.fromValue = std::numeric_limits<float>::lowest();
|
||||||
|
item.start = indices[0];
|
||||||
|
} else {
|
||||||
|
item.fromValue = filtered.back().toValue;
|
||||||
|
item.start = filtered.back().end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Assign class number to the interval (cutpoint)
|
||||||
|
item.classNumber = classNumber++;
|
||||||
filtered.push_back(item);
|
filtered.push_back(item);
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
std::cout << "Rejected" << std::endl;
|
||||||
|
lastReject = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!first)
|
||||||
|
filtered.back().toValue = std::numeric_limits<float>::max();
|
||||||
|
else {
|
||||||
|
filtered.push_back(rest);
|
||||||
|
}
|
||||||
|
|
||||||
cutPoints = filtered;
|
cutPoints = filtered;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::computeCutPoints()
|
void CPPFImdlp::computeCutPoints()
|
||||||
{
|
{
|
||||||
|
|
||||||
std::vector<CutPointBody> cutPts;
|
std::vector<CutPoint_t> cutPts;
|
||||||
CutPointBody cutPoint;
|
CutPoint_t cutPoint;
|
||||||
std::vector<size_t> cutIdx;
|
indices_t cutIdx;
|
||||||
float xPrev, xCur, xPivot;
|
float xPrev, xCur, xPivot;
|
||||||
int yPrev, yCur, yPivot;
|
int yPrev, yCur, yPivot;
|
||||||
size_t idxPrev, idxPivot, idx, numElements, start;
|
size_t idxPrev, idxPivot, idx, numElements, start;
|
||||||
@@ -99,28 +147,25 @@ namespace mdlp
|
|||||||
bool firstCutPoint = true;
|
bool firstCutPoint = true;
|
||||||
if (debug)
|
if (debug)
|
||||||
printf("*idx=%lu -> (-1, -1) Prev(%3.1f, %d) Elementos: %lu\n", idx, xCur, yCur, numElements);
|
printf("*idx=%lu -> (-1, -1) Prev(%3.1f, %d) Elementos: %lu\n", idx, xCur, yCur, numElements);
|
||||||
while (idx < numElements)
|
while (idx < numElements) {
|
||||||
{
|
|
||||||
xPivot = xCur;
|
xPivot = xCur;
|
||||||
yPivot = yCur;
|
yPivot = yCur;
|
||||||
idxPivot = indices[idx];
|
idxPivot = indices[idx];
|
||||||
if (debug)
|
if (debug)
|
||||||
printf("<idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
|
printf("<idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
|
||||||
// Read the same values and check class changes
|
// Read the same values and check class changes
|
||||||
do
|
do {
|
||||||
{
|
|
||||||
idx++;
|
idx++;
|
||||||
xCur = X[indices[idx]];
|
xCur = X[indices[idx]];
|
||||||
yCur = y[indices[idx]];
|
yCur = y[indices[idx]];
|
||||||
if (yCur != yPivot && xCur == xPivot)
|
if (yCur != yPivot && xCur == xPivot) {
|
||||||
{
|
|
||||||
yPivot = -1;
|
yPivot = -1;
|
||||||
}
|
}
|
||||||
if (debug)
|
if (debug)
|
||||||
printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
|
printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
|
||||||
} while (idx < numElements && xCur == xPivot);
|
}
|
||||||
if (yPivot == -1 || yPrev != yCur)
|
while (idx < numElements && xCur == xPivot);
|
||||||
{
|
if (yPivot == -1 || yPrev != yCur) {
|
||||||
cutPoint.start = start;
|
cutPoint.start = start;
|
||||||
cutPoint.end = idx - 1;
|
cutPoint.end = idx - 1;
|
||||||
start = idx;
|
start = idx;
|
||||||
@@ -128,8 +173,7 @@ namespace mdlp
|
|||||||
cutPoint.toValue = (xPrev + xCur) / 2;
|
cutPoint.toValue = (xPrev + xCur) / 2;
|
||||||
cutPoint.classNumber = -1;
|
cutPoint.classNumber = -1;
|
||||||
firstCutPoint = false;
|
firstCutPoint = false;
|
||||||
if (debug)
|
if (debug) {
|
||||||
{
|
|
||||||
printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
||||||
}
|
}
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
@@ -139,8 +183,7 @@ namespace mdlp
|
|||||||
xPrev = xPivot;
|
xPrev = xPivot;
|
||||||
idxPrev = indices[idxPivot];
|
idxPrev = indices[idxPivot];
|
||||||
}
|
}
|
||||||
if (idx == numElements)
|
if (idx == numElements) {
|
||||||
{
|
|
||||||
cutPoint.start = start;
|
cutPoint.start = start;
|
||||||
cutPoint.end = numElements;
|
cutPoint.end = numElements;
|
||||||
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
|
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
|
||||||
@@ -155,31 +198,27 @@ namespace mdlp
|
|||||||
}
|
}
|
||||||
void CPPFImdlp::computeCutPointsAnt()
|
void CPPFImdlp::computeCutPointsAnt()
|
||||||
{
|
{
|
||||||
std::vector<float> cutPts;
|
samples cutPts;
|
||||||
std::vector<int> cutIdx;
|
labels cutIdx;
|
||||||
float xPrev, cutPoint;
|
float xPrev, cutPoint;
|
||||||
int yPrev;
|
int yPrev;
|
||||||
size_t idxPrev;
|
size_t idxPrev;
|
||||||
xPrev = X.at(indices[0]);
|
xPrev = X.at(indices[0]);
|
||||||
yPrev = y.at(indices[0]);
|
yPrev = y.at(indices[0]);
|
||||||
idxPrev = indices[0];
|
idxPrev = indices[0];
|
||||||
if (debug)
|
if (debug) {
|
||||||
{
|
|
||||||
std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl;
|
std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl;
|
||||||
}
|
}
|
||||||
for (auto index = indices.begin(); index != indices.end(); ++index)
|
for (auto index = indices.begin(); index != indices.end(); ++index) {
|
||||||
{
|
|
||||||
// Definition 2 Cut points are always on boundaries
|
// Definition 2 Cut points are always on boundaries
|
||||||
if (y.at(*index) != yPrev && xPrev < X.at(*index))
|
if (y.at(*index) != yPrev && xPrev < X.at(*index)) {
|
||||||
{
|
|
||||||
cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider;
|
cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider;
|
||||||
if (debug)
|
if (debug) {
|
||||||
{
|
|
||||||
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
|
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
|
||||||
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
|
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
|
||||||
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")="
|
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")="
|
||||||
<< ((X.at(*index) + xPrev) / 2) << "idxPrev"
|
<< ((X.at(*index) + xPrev) / 2) << "idxPrev"
|
||||||
<< idxPrev << std::endl;
|
<< idxPrev << std::endl;
|
||||||
}
|
}
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
cutIdx.push_back(idxPrev);
|
cutIdx.push_back(idxPrev);
|
||||||
@@ -191,13 +230,13 @@ namespace mdlp
|
|||||||
// cutPoints = cutPts;
|
// cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||||
std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
|
indices_t CPPFImdlp::sortIndices(samples& X)
|
||||||
{
|
{
|
||||||
std::vector<size_t> idx(X.size());
|
indices_t idx(X.size());
|
||||||
std::iota(idx.begin(), idx.end(), 0);
|
std::iota(idx.begin(), idx.end(), 0);
|
||||||
for (std::size_t i = 0; i < X.size(); i++)
|
for (std::size_t i = 0; i < X.size(); i++)
|
||||||
stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2)
|
stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2)
|
||||||
{ return X[i1] < X[i2]; });
|
{ return X[i1] < X[i2]; });
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,44 +1,37 @@
|
|||||||
#ifndef CPPFIMDLP_H
|
#ifndef CPPFIMDLP_H
|
||||||
#define CPPFIMDLP_H
|
#define CPPFIMDLP_H
|
||||||
#include <vector>
|
#include "typesFImdlp.h"
|
||||||
#include <utility>
|
#include <utility>
|
||||||
namespace mdlp
|
namespace mdlp {
|
||||||
{
|
class CPPFImdlp {
|
||||||
struct CutPointBody
|
|
||||||
{
|
|
||||||
size_t start, end; // indices of the sorted vector
|
|
||||||
int classNumber; // class assigned to the cut point
|
|
||||||
float fromValue, toValue; // Values of the variable
|
|
||||||
};
|
|
||||||
class CPPFImdlp
|
|
||||||
{
|
|
||||||
private:
|
private:
|
||||||
bool debug;
|
bool debug;
|
||||||
int precision;
|
int precision;
|
||||||
float divider;
|
float divider;
|
||||||
std::vector<size_t> indices; // sorted indices to use with X and y
|
indices_t indices; // sorted indices to use with X and y
|
||||||
std::vector<float> X;
|
samples X;
|
||||||
std::vector<int> y;
|
labels y;
|
||||||
std::vector<float> xDiscretized;
|
labels xDiscretized;
|
||||||
std::vector<CutPointBody> cutPoints;
|
int numClasses;
|
||||||
|
std::vector<CutPoint_t> cutPoints;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::vector<size_t> sortIndices(std::vector<float> &);
|
indices_t sortIndices(samples&);
|
||||||
bool evaluateCutPoint(CutPointBody);
|
|
||||||
void filterCutPoints();
|
|
||||||
void computeCutPoints();
|
|
||||||
void applyCutPoints();
|
|
||||||
void computeCutPointsAnt();
|
void computeCutPointsAnt();
|
||||||
|
void computeCutPoints();
|
||||||
|
bool evaluateCutPoint(CutPoint_t, CutPoint_t);
|
||||||
|
void filterCutPoints();
|
||||||
|
void applyCutPoints();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CPPFImdlp();
|
CPPFImdlp();
|
||||||
CPPFImdlp(int, bool debug = false);
|
CPPFImdlp(int, bool debug = false);
|
||||||
~CPPFImdlp();
|
~CPPFImdlp();
|
||||||
std::vector<CutPointBody> getCutPoints();
|
std::vector<CutPoint_t> getCutPoints();
|
||||||
std::vector<float> getDiscretizedValues();
|
labels getDiscretizedValues();
|
||||||
void debugPoints(std::vector<float> &, std::vector<int> &);
|
void debugPoints(samples&, labels&);
|
||||||
void fit(std::vector<float> &, std::vector<int> &);
|
void fit(samples&, labels&);
|
||||||
std::vector<float> &transform(std::vector<float> &);
|
labels& transform(samples&);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@@ -1,40 +1,35 @@
|
|||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
namespace mdlp
|
namespace mdlp {
|
||||||
{
|
|
||||||
Metrics::Metrics()
|
Metrics::Metrics()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, size_t start, size_t end)
|
int Metrics::numClasses(labels& y, indices_t indices, size_t start, size_t end)
|
||||||
{
|
{
|
||||||
std::set<int> numClasses;
|
std::set<int> numClasses;
|
||||||
for (auto i = start; i < end; ++i)
|
for (auto i = start; i < end; ++i) {
|
||||||
{
|
|
||||||
numClasses.insert(y[indices[i]]);
|
numClasses.insert(y[indices[i]]);
|
||||||
}
|
}
|
||||||
return numClasses.size();
|
return numClasses.size();
|
||||||
}
|
}
|
||||||
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, int nClasses)
|
float Metrics::entropy(labels& y, indices_t& indices, size_t start, size_t end, int nClasses)
|
||||||
{
|
{
|
||||||
float entropy = 0;
|
float entropy = 0;
|
||||||
int nElements = 0;
|
int nElements = 0;
|
||||||
std::vector<int> counts(nClasses + 1, 0);
|
labels counts(nClasses + 1, 0);
|
||||||
for (auto i = &indices[start]; i != &indices[end]; ++i)
|
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||||
{
|
|
||||||
counts[y[*i]]++;
|
counts[y[*i]]++;
|
||||||
nElements++;
|
nElements++;
|
||||||
}
|
}
|
||||||
for (auto count : counts)
|
for (auto count : counts) {
|
||||||
{
|
if (count > 0) {
|
||||||
if (count > 0)
|
|
||||||
{
|
|
||||||
float p = (float)count / nElements;
|
float p = (float)count / nElements;
|
||||||
entropy -= p * log2(p);
|
entropy -= p * log2(p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return entropy;
|
return entropy;
|
||||||
}
|
}
|
||||||
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, size_t cutPoint, int nClasses)
|
float Metrics::informationGain(labels& y, indices_t& indices, size_t start, size_t end, size_t cutPoint, int nClasses)
|
||||||
{
|
{
|
||||||
float iGain = 0.0;
|
float iGain = 0.0;
|
||||||
float entropy, entropyLeft, entropyRight;
|
float entropy, entropyLeft, entropyRight;
|
||||||
|
@@ -1,16 +1,14 @@
|
|||||||
#ifndef METRICS_H
|
#ifndef METRICS_H
|
||||||
#define METRICS_H
|
#define METRICS_H
|
||||||
#include <vector>
|
#include "typesFImdlp.h"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
namespace mdlp
|
namespace mdlp {
|
||||||
{
|
class Metrics {
|
||||||
class Metrics
|
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
Metrics();
|
Metrics();
|
||||||
static int numClasses(std::vector<int> &, std::vector<size_t>, size_t, size_t);
|
static int numClasses(labels&, indices_t, size_t, size_t);
|
||||||
static float entropy(std::vector<int> &, std::vector<size_t> &, size_t, size_t, int);
|
static float entropy(labels&, indices_t&, size_t, size_t, int);
|
||||||
static float informationGain(std::vector<int> &, std::vector<size_t> &, size_t, size_t, size_t, int);
|
static float informationGain(labels&, indices_t&, size_t, size_t, size_t, int);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@@ -12,13 +12,13 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
|
|||||||
CPPFImdlp() except +
|
CPPFImdlp() except +
|
||||||
CPPFImdlp(int, bool) except +
|
CPPFImdlp(int, bool) except +
|
||||||
void fit(vector[float]&, vector[int]&)
|
void fit(vector[float]&, vector[int]&)
|
||||||
vector[float] transform(vector[float]&)
|
vector[int] transform(vector[float]&)
|
||||||
vector[float] getDiscretizedValues()
|
vector[int] getDiscretizedValues()
|
||||||
vector[CutPointBody] getCutPoints()
|
vector[CutPointBody] getCutPoints()
|
||||||
void debugPoints(vector[float]&, vector[int]&)
|
void debugPoints(vector[float]&, vector[int]&)
|
||||||
|
|
||||||
|
|
||||||
class PCutPointBody:
|
class PCutPoint_t:
|
||||||
def __init__(self, start, end, fromValue, toValue):
|
def __init__(self, start, end, fromValue, toValue):
|
||||||
self.start = start
|
self.start = start
|
||||||
self.end = end
|
self.end = end
|
||||||
@@ -37,7 +37,7 @@ cdef class CFImdlp:
|
|||||||
return self.thisptr.transform(X)
|
return self.thisptr.transform(X)
|
||||||
def get_discretized_values(self):
|
def get_discretized_values(self):
|
||||||
return self.thisptr.getDiscretizedValues()
|
return self.thisptr.getDiscretizedValues()
|
||||||
def get_cut_points(self, X, y):
|
def get_cut_points(self):
|
||||||
return self.thisptr.getCutPoints()
|
return self.thisptr.getCutPoints()
|
||||||
def debug_points(self, X, y):
|
def debug_points(self, X, y):
|
||||||
return self.thisptr.debugPoints(X, y)
|
return self.thisptr.debugPoints(X, y)
|
||||||
|
Binary file not shown.
@@ -1,39 +1,76 @@
|
|||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include "../Metrics.h"
|
||||||
#include "../CPPFImdlp.h"
|
#include "../CPPFImdlp.h"
|
||||||
namespace
|
namespace mdlp {
|
||||||
{
|
class TestMetrics : public CPPFImdlp, public testing::Test {
|
||||||
float precision = 0.000001;
|
|
||||||
class TestMetrics : protected mdlp::CPPFImdlp
|
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
std::vector<size_t> testSort(std::vector<float> &X)
|
//TestMetrics(samples X, labels y, indices_t indices) : X(X), y(y), indices(indices), CPPFImdlp(true) {}
|
||||||
|
indices_t indices; // sorted indices to use with X and y
|
||||||
|
samples X;
|
||||||
|
labels y;
|
||||||
|
samples xDiscretized;
|
||||||
|
int numClasses;
|
||||||
|
float precision_test = 0.000001;
|
||||||
|
void SetUp() override
|
||||||
{
|
{
|
||||||
return sortIndices(X);
|
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
|
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||||
|
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
|
numClasses = 2;
|
||||||
|
}
|
||||||
|
void check_sorted_vector(samples& X, indices_t indices)
|
||||||
|
{
|
||||||
|
this->X = X;
|
||||||
|
this->indices = indices;
|
||||||
|
indices_t testSortedIndices = sortIndices(X);
|
||||||
|
float prev = X[testSortedIndices[0]];
|
||||||
|
for (auto i = 0; i < X.size(); ++i) {
|
||||||
|
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||||
|
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||||
|
prev = X[testSortedIndices[i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<CutPoint_t> testCutPoints(samples& X, indices_t& indices, labels& y)
|
||||||
|
{
|
||||||
|
this->X = X;
|
||||||
|
this->y = y;
|
||||||
|
this->indices = indices;
|
||||||
|
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
|
||||||
|
|
||||||
|
//computeCutPoints();
|
||||||
|
return getCutPoints();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
void check_sorted_vector(std::vector<float> &X, std::vector<size_t> indices)
|
//
|
||||||
|
TEST_F(TestMetrics, SortIndices)
|
||||||
{
|
{
|
||||||
TestMetrics testClass = TestMetrics();
|
samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
std::vector<size_t> testSortedIndices = testClass.testSort(X);
|
indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||||
float prev = X[testSortedIndices[0]];
|
check_sorted_vector(X, indices);
|
||||||
for (auto i = 0; i < X.size(); ++i)
|
X = { 5.77, 5.88, 5.99 };
|
||||||
{
|
indices = { 0, 1, 2 };
|
||||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
check_sorted_vector(X, indices);
|
||||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
X = { 5.33, 5.22, 5.11 };
|
||||||
prev = X[testSortedIndices[i]];
|
indices = { 2, 1, 0 };
|
||||||
|
check_sorted_vector(X, indices);
|
||||||
|
}
|
||||||
|
// TEST_F(TestMetrics, EvaluateCutPoint)
|
||||||
|
// {
|
||||||
|
// CutPoint_t rest, candidate;
|
||||||
|
// rest.start = 0;
|
||||||
|
// rest.end = 10;
|
||||||
|
// candidate.start = 0;
|
||||||
|
// candidate.end = 5;
|
||||||
|
// float computed = evaluateCutPoint(rest, candidate);
|
||||||
|
// ASSERT_NEAR(0.468996, computed, precision_test);
|
||||||
|
// }
|
||||||
|
TEST_F(TestMetrics, ComputeCutPoints)
|
||||||
|
{
|
||||||
|
std::vector<CutPoint_t> computed, expected;
|
||||||
|
computeCutPoints();
|
||||||
|
computed = getCutPoints();
|
||||||
|
for (auto cut : computed) {
|
||||||
|
std::cout << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ", " << cut.toValue << ")" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST(FImdlpTest, SortIndices)
|
|
||||||
{
|
|
||||||
|
|
||||||
std::vector<float> X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9};
|
|
||||||
std::vector<size_t> indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7};
|
|
||||||
check_sorted_vector(X, indices);
|
|
||||||
X = {5.77, 5.88, 5.99};
|
|
||||||
indices = {0, 1, 2};
|
|
||||||
check_sorted_vector(X, indices);
|
|
||||||
X = {5.33, 5.22, 5.11};
|
|
||||||
indices = {2, 1, 0};
|
|
||||||
check_sorted_vector(X, indices);
|
|
||||||
}
|
|
||||||
}
|
}
|
@@ -1,33 +1,31 @@
|
|||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include "../Metrics.h"
|
#include "../Metrics.h"
|
||||||
|
|
||||||
namespace
|
namespace mdlp {
|
||||||
{
|
|
||||||
|
|
||||||
float precision = 0.000001;
|
float precision = 0.000001;
|
||||||
TEST(MetricTest, NumClasses)
|
TEST(MetricTest, NumClasses)
|
||||||
{
|
{
|
||||||
std::vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
|
labels y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||||
std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||||
EXPECT_EQ(1, mdlp::Metrics::numClasses(y, indices, 4, 8));
|
EXPECT_EQ(1, Metrics::numClasses(y, indices, 4, 8));
|
||||||
EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 0, 10));
|
EXPECT_EQ(2, Metrics::numClasses(y, indices, 0, 10));
|
||||||
EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 8, 10));
|
EXPECT_EQ(2, Metrics::numClasses(y, indices, 8, 10));
|
||||||
}
|
}
|
||||||
TEST(MetricTest, Entropy)
|
TEST(MetricTest, Entropy)
|
||||||
{
|
{
|
||||||
std::vector<int> y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||||
EXPECT_EQ(1, mdlp::Metrics::entropy(y, indices, 0, 10, 2));
|
EXPECT_EQ(1, Metrics::entropy(y, indices, 0, 10, 2));
|
||||||
EXPECT_EQ(0, mdlp::Metrics::entropy(y, indices, 0, 5, 1));
|
EXPECT_EQ(0, Metrics::entropy(y, indices, 0, 5, 1));
|
||||||
std::vector<int> yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
|
labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||||
ASSERT_NEAR(0.468996, mdlp::Metrics::entropy(yz, indices, 0, 10, 2), precision);
|
ASSERT_NEAR(0.468996, Metrics::entropy(yz, indices, 0, 10, 2), precision);
|
||||||
}
|
}
|
||||||
TEST(MetricTest, InformationGain)
|
TEST(MetricTest, InformationGain)
|
||||||
{
|
{
|
||||||
std::vector<int> y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||||
std::vector<int> yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
|
labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||||
ASSERT_NEAR(1, mdlp::Metrics::informationGain(y, indices, 0, 10, 5, 2), precision);
|
ASSERT_NEAR(1, Metrics::informationGain(y, indices, 0, 10, 5, 2), precision);
|
||||||
ASSERT_NEAR(0.108032, mdlp::Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision);
|
ASSERT_NEAR(0.108032, Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
12
fimdlp/testcpp/test
Executable file
12
fimdlp/testcpp/test
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
cmake -S . -B build -Wno-dev
|
||||||
|
if test $? -ne 0; then
|
||||||
|
echo "Error in creating build commands."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
cmake --build build
|
||||||
|
if test $? -ne 0; then
|
||||||
|
echo "Error in build command."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
cd build
|
||||||
|
ctest --output-on-failure
|
@@ -1,4 +1,4 @@
|
|||||||
cmake -S . -B build
|
cmake -S . -B build -Wno-dev
|
||||||
if test $? -ne 0; then
|
if test $? -ne 0; then
|
||||||
echo "Error in creating build commands."
|
echo "Error in creating build commands."
|
||||||
exit 1
|
exit 1
|
||||||
|
15
fimdlp/typesFImdlp.h
Normal file
15
fimdlp/typesFImdlp.h
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#ifndef TYPES_H
|
||||||
|
#define TYPES_H
|
||||||
|
#include <vector>
|
||||||
|
namespace mdlp {
|
||||||
|
typedef std::vector<float> samples;
|
||||||
|
typedef std::vector<int> labels;
|
||||||
|
typedef std::vector<size_t> indices_t;
|
||||||
|
struct CutPointBody {
|
||||||
|
size_t start, end; // indices of the sorted vector
|
||||||
|
int classNumber; // class assigned to the cut point
|
||||||
|
float fromValue, toValue;
|
||||||
|
};
|
||||||
|
typedef CutPointBody CutPoint_t;
|
||||||
|
}
|
||||||
|
#endif
|
@@ -1,21 +1,18 @@
|
|||||||
#include "FImdlp.h"
|
#include "FImdlp.h"
|
||||||
namespace FImdlp
|
namespace FImdlp {
|
||||||
{
|
|
||||||
FImdlp::FImdlp()
|
FImdlp::FImdlp()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
FImdlp::~FImdlp()
|
FImdlp::~FImdlp()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
std::vector<float> FImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
|
samples FImdlp::cutPoints(labels& X, labels& y)
|
||||||
{
|
{
|
||||||
std::vector<float> cutPts;
|
samples cutPts;
|
||||||
int i, ant = X.at(0);
|
int i, ant = X.at(0);
|
||||||
int n = X.size();
|
int n = X.size();
|
||||||
for (i = 1; i < n; i++)
|
for (i = 1; i < n; i++) {
|
||||||
{
|
if (X.at(i) != ant) {
|
||||||
if (X.at(i) != ant)
|
|
||||||
{
|
|
||||||
cutPts.push_back(float(X.at(i) + ant) / 2);
|
cutPts.push_back(float(X.at(i) + ant) / 2);
|
||||||
ant = X.at(i);
|
ant = X.at(i);
|
||||||
}
|
}
|
||||||
|
@@ -2,14 +2,12 @@
|
|||||||
#define FIMDLP_H
|
#define FIMDLP_H
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
namespace FImdlp
|
namespace FImdlp {
|
||||||
{
|
class FImdlp {
|
||||||
class FImdlp
|
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
FImdlp();
|
FImdlp();
|
||||||
~FImdlp();
|
~FImdlp();
|
||||||
std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &);
|
samples cutPoints(labels&, labels&);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
13
sample.py
13
sample.py
@@ -18,13 +18,22 @@ test = CFImdlp(debug=False)
|
|||||||
# k = test.cut_points_ant(X[:, 0], y)
|
# k = test.cut_points_ant(X[:, 0], y)
|
||||||
# print(k)
|
# print(k)
|
||||||
# test.debug_points(X[:, 0], y)
|
# test.debug_points(X[:, 0], y)
|
||||||
result = test.cut_points(X[:, 0], y)
|
X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||||
|
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||||
|
y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||||
|
# test.fit(X[:, 0], y)
|
||||||
|
test.fit(X, y)
|
||||||
|
result = test.get_cut_points()
|
||||||
for item in result:
|
for item in result:
|
||||||
print(
|
print(
|
||||||
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||||
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||||
)
|
)
|
||||||
|
print(test.get_discretized_values())
|
||||||
|
# print(test.transform(X))
|
||||||
|
# print(X)
|
||||||
|
# print(indices)
|
||||||
|
# print(np.array(X)[indices])
|
||||||
|
|
||||||
# X = np.array(
|
# X = np.array(
|
||||||
# [
|
# [
|
||||||
|
Reference in New Issue
Block a user