mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-17 16:35:57 +00:00
Refactor testing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -34,3 +34,4 @@
|
|||||||
**/lcoverage
|
**/lcoverage
|
||||||
.idea
|
.idea
|
||||||
cmake-*
|
cmake-*
|
||||||
|
**/CMakeFiles
|
||||||
|
209
CPPFImdlp.cpp
209
CPPFImdlp.cpp
@@ -4,9 +4,10 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include "CPPFImdlp.h"
|
#include "CPPFImdlp.h"
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
|
// OJO QUITAR ESTO
|
||||||
|
#include <iostream>
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
CPPFImdlp::CPPFImdlp(int proposal):proposal(proposal), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
CPPFImdlp::~CPPFImdlp()
|
CPPFImdlp::~CPPFImdlp()
|
||||||
@@ -23,14 +24,14 @@ namespace mdlp {
|
|||||||
if (X.size() == 0 || y.size() == 0) {
|
if (X.size() == 0 || y.size() == 0) {
|
||||||
throw invalid_argument("X and y must have at least one element");
|
throw invalid_argument("X and y must have at least one element");
|
||||||
}
|
}
|
||||||
indices = sortIndices(X_);
|
indices = sortIndices2(X_, y_);
|
||||||
metrics.setData(y, indices);
|
metrics.setData(y, indices);
|
||||||
switch (proposal) {
|
switch (algorithm) {
|
||||||
case 0:
|
case 0:
|
||||||
computeCutPoints(0, X.size());
|
computeCutPoints(0, X.size());
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
computeCutPointsProposal();
|
computeCutPointsProposal(0, X.size());
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
computeCutPointsAlternative(0, X.size());
|
computeCutPointsAlternative(0, X.size());
|
||||||
@@ -38,78 +39,169 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
precision_t CPPFImdlp::value_cut_point(size_t start, size_t idx)
|
||||||
|
{
|
||||||
|
size_t idxPrev = idx - 1;
|
||||||
|
precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]];
|
||||||
|
// definition 2 of the paper => X[t-1] < X[t]
|
||||||
|
while (idxPrev-- > start && actual == previous) {
|
||||||
|
previous = X[indices[idxPrev]];
|
||||||
|
}
|
||||||
|
return (previous + actual) / 2;
|
||||||
|
}
|
||||||
|
tuple<precision_t, size_t> CPPFImdlp::value_proposal_cut_point(size_t start, size_t cut, size_t end)
|
||||||
|
{
|
||||||
|
size_t idxPrev = cut - 1;
|
||||||
|
precision_t previous, next, actual;
|
||||||
|
previous = X[indices[idxPrev]];
|
||||||
|
next = actual = X[indices[cut]];
|
||||||
|
// definition 2 of the paper => X[t-1] < X[t]
|
||||||
|
while (idxPrev-- > start && actual == previous) {
|
||||||
|
previous = X[indices[idxPrev]];
|
||||||
|
}
|
||||||
|
// get the last equal value of X in the interval
|
||||||
|
while (actual == X[indices[cut++]] && cut < end);
|
||||||
|
if (previous == actual && cut < end)
|
||||||
|
actual = X[indices[cut]];
|
||||||
|
cut--;
|
||||||
|
return make_tuple((previous + actual) / 2, cut);
|
||||||
|
}
|
||||||
|
// void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||||
|
// {
|
||||||
|
// size_t cut;
|
||||||
|
// if (end - start < 2)
|
||||||
|
// return;
|
||||||
|
// cut = getCandidate(start, end);
|
||||||
|
// if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
|
||||||
|
// // cut == max means that there is no candidate in the interval
|
||||||
|
// // No boundary found, so we add both ends of the interval as cutpoints
|
||||||
|
// // because they were selected by the algorithm before
|
||||||
|
// if (start != 0)
|
||||||
|
// cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
|
||||||
|
// if (end != X.size())
|
||||||
|
// cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
|
||||||
|
// //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
// // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
// computeCutPoints(start, cut);
|
||||||
|
// computeCutPoints(cut, end);
|
||||||
|
// }
|
||||||
|
// void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||||
|
// {
|
||||||
|
// size_t cut;
|
||||||
|
// if (end - start < 2)
|
||||||
|
// return;
|
||||||
|
// cut = getCandidate(start, end);
|
||||||
|
// if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
|
||||||
|
// // cut == max means that there is no candidate in the interval
|
||||||
|
// // No boundary found, so we add both ends of the interval as cutpoints
|
||||||
|
// // because they were selected by the algorithm before
|
||||||
|
// if (start != 0)
|
||||||
|
// cutPoints.push_back(value_cut_point(0, start));
|
||||||
|
// if (end != X.size())
|
||||||
|
// cutPoints.push_back(value_cut_point(start, end));
|
||||||
|
// //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
// // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
// computeCutPointsAlternative(start, cut);
|
||||||
|
// computeCutPointsAlternative(cut, end);
|
||||||
|
// }
|
||||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
int cut;
|
size_t cut;
|
||||||
if (end - start < 2)
|
if (end - start < 2)
|
||||||
return;
|
return;
|
||||||
cut = getCandidate(start, end);
|
cut = getCandidate(start, end);
|
||||||
if (cut == -1 || !mdlp(start, cut, end)) {
|
if (cut == numeric_limits<size_t>::max())
|
||||||
// cut.value == -1 means that there is no candidate in the interval
|
|
||||||
// No boundary found, so we add both ends of the interval as cutpoints
|
|
||||||
// because they were selected by the algorithm before
|
|
||||||
if (start != 0)
|
|
||||||
cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
|
|
||||||
if (end != X.size())
|
|
||||||
cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
|
|
||||||
return;
|
return;
|
||||||
|
if (mdlp(start, cut, end)) {
|
||||||
|
cutPoints.push_back(value_cut_point(start, cut));
|
||||||
|
//cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
}
|
}
|
||||||
computeCutPoints(start, cut);
|
computeCutPoints(start, cut);
|
||||||
computeCutPoints(cut, end);
|
computeCutPoints(cut, end);
|
||||||
}
|
}
|
||||||
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
precision_t cut;
|
size_t cut;
|
||||||
if (end - start < 2)
|
if (end - start < 2)
|
||||||
return;
|
return;
|
||||||
cut = getCandidate(start, end);
|
cut = getCandidate(start, end);
|
||||||
if (cut == -1)
|
if (cut == numeric_limits<size_t>::max())
|
||||||
return;
|
return;
|
||||||
if (mdlp(start, cut, end)) {
|
if (mdlp(start, cut, end)) {
|
||||||
cutPoints.push_back((X[indices[cut]] + X[indices[cut - 1]]) / 2);
|
cutPoints.push_back(value_cut_point(start, cut));
|
||||||
|
//cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
computeCutPointsAlternative(start, cut);
|
||||||
|
computeCutPointsAlternative(cut, end);
|
||||||
}
|
}
|
||||||
computeCutPointsAlternative(start, cut);
|
|
||||||
computeCutPointsAlternative(cut, end);
|
|
||||||
}
|
}
|
||||||
void CPPFImdlp::computeCutPointsProposal()
|
// void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||||
|
// {
|
||||||
|
// size_t cut;
|
||||||
|
// if (end - start < 2)
|
||||||
|
// return;
|
||||||
|
// cut = getCandidateWeka(start, end);
|
||||||
|
// if (cut == numeric_limits<size_t>::max())
|
||||||
|
// return;
|
||||||
|
// if (mdlp(start, cut, end)) {
|
||||||
|
// cutPoints.push_back(value_cut_point(start, cut));
|
||||||
|
// //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
// }
|
||||||
|
// computeCutPointsAlternative(start, cut);
|
||||||
|
// computeCutPointsAlternative(cut, end);
|
||||||
|
// }
|
||||||
|
void CPPFImdlp::computeCutPointsProposal(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
precision_t xPrev, xCur, xPivot, cutPoint;
|
size_t cut;
|
||||||
int yPrev, yCur, yPivot;
|
tuple<precision_t, size_t> result;
|
||||||
size_t idx, numElements, start;
|
if (end - start < 2)
|
||||||
|
return;
|
||||||
|
cut = getCandidate(start, end);
|
||||||
|
if (cut == numeric_limits<size_t>::max())
|
||||||
|
return;
|
||||||
|
if (mdlp(start, cut, end)) {
|
||||||
|
//cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
result = value_proposal_cut_point(start, cut, end);
|
||||||
|
cut = get<1>(result);
|
||||||
|
cutPoints.push_back(get<0>(result));
|
||||||
|
//cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||||
|
computeCutPointsProposal(start, cut);
|
||||||
|
computeCutPointsProposal(cut, end);
|
||||||
|
}
|
||||||
|
|
||||||
xCur = xPrev = X[indices[0]];
|
}
|
||||||
yCur = yPrev = y[indices[0]];
|
size_t CPPFImdlp::getCandidateWeka(size_t start, size_t end)
|
||||||
numElements = indices.size() - 1;
|
{
|
||||||
idx = start = 0;
|
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||||
while (idx < numElements) {
|
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||||
xPivot = xCur;
|
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||||
yPivot = yCur;
|
precision_t entropy_left, entropy_right, minEntropy;
|
||||||
// Read the same values and check class changes
|
minEntropy = metrics.entropy(start, end);
|
||||||
do {
|
for (auto idx = start + 1; idx < end; idx++) {
|
||||||
idx++;
|
// Cutpoints are always on boundaries (definition 2)
|
||||||
xCur = X[indices[idx]];
|
if (X[indices[idx - 1]] < X[indices[idx]]) {
|
||||||
yCur = y[indices[idx]];
|
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||||
if (yCur != yPivot && xCur == xPivot) {
|
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
|
||||||
yPivot = -1;
|
if (entropy_left + entropy_right < minEntropy) {
|
||||||
|
minEntropy = entropy_left + entropy_right;
|
||||||
|
candidate = idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (idx < numElements && xCur == xPivot);
|
|
||||||
// Check if the class changed and there are more than 1 element
|
|
||||||
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && mdlp(start, idx, indices.size())) {
|
|
||||||
start = idx;
|
|
||||||
cutPoint = (xPrev + xCur) / 2;
|
|
||||||
cutPoints.push_back(cutPoint);
|
|
||||||
}
|
|
||||||
yPrev = yPivot;
|
|
||||||
xPrev = xPivot;
|
|
||||||
}
|
}
|
||||||
|
return candidate;
|
||||||
}
|
}
|
||||||
long int CPPFImdlp::getCandidate(size_t start, size_t end)
|
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
long int candidate = -1, elements = end - start;
|
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||||
precision_t entropy_left, entropy_right, minEntropy = numeric_limits<precision_t>::max();
|
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||||
|
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||||
|
precision_t entropy_left, entropy_right, minEntropy;
|
||||||
|
minEntropy = metrics.entropy(start, end);
|
||||||
for (auto idx = start + 1; idx < end; idx++) {
|
for (auto idx = start + 1; idx < end; idx++) {
|
||||||
// Cutpoints are always on boundaries
|
// Cutpoints are always on boundaries (definition 2)
|
||||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||||
continue;
|
continue;
|
||||||
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||||
@@ -137,9 +229,9 @@ namespace mdlp {
|
|||||||
ent1 = metrics.entropy(start, cut);
|
ent1 = metrics.entropy(start, cut);
|
||||||
ent2 = metrics.entropy(cut, end);
|
ent2 = metrics.entropy(cut, end);
|
||||||
ig = metrics.informationGain(start, cut, end);
|
ig = metrics.informationGain(start, cut, end);
|
||||||
delta = log2(pow(3, precision_t(k)) - 2) -
|
delta = log(pow(3, precision_t(k)) - 2) -
|
||||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
|
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
|
||||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
precision_t term = 1 / N * (log(N - 1) + delta);
|
||||||
return ig > term;
|
return ig > term;
|
||||||
}
|
}
|
||||||
cutPoints_t CPPFImdlp::getCutPoints()
|
cutPoints_t CPPFImdlp::getCutPoints()
|
||||||
@@ -164,4 +256,17 @@ namespace mdlp {
|
|||||||
{ return X_[i1] < X_[i2]; });
|
{ return X_[i1] < X_[i2]; });
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
indices_t CPPFImdlp::sortIndices2(samples_t& X_, labels_t& y_)
|
||||||
|
{
|
||||||
|
indices_t idx(X_.size());
|
||||||
|
iota(idx.begin(), idx.end(), 0);
|
||||||
|
for (size_t i = 0; i < X_.size(); i++)
|
||||||
|
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2)
|
||||||
|
{
|
||||||
|
if (X_[i1] == X_[i2]) return y_[i1] < y_[i2];
|
||||||
|
else
|
||||||
|
return X_[i1] < X_[i2];
|
||||||
|
});
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
13
CPPFImdlp.h
13
CPPFImdlp.h
@@ -3,10 +3,12 @@
|
|||||||
#include "typesFImdlp.h"
|
#include "typesFImdlp.h"
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <tuple>
|
||||||
|
#include <string>
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class CPPFImdlp {
|
class CPPFImdlp {
|
||||||
protected:
|
protected:
|
||||||
int proposal;
|
int algorithm;
|
||||||
indices_t indices; // sorted indices to use with X and y
|
indices_t indices; // sorted indices to use with X and y
|
||||||
samples_t X;
|
samples_t X;
|
||||||
labels_t y;
|
labels_t y;
|
||||||
@@ -14,17 +16,22 @@ namespace mdlp {
|
|||||||
cutPoints_t cutPoints;
|
cutPoints_t cutPoints;
|
||||||
|
|
||||||
static indices_t sortIndices(samples_t&);
|
static indices_t sortIndices(samples_t&);
|
||||||
|
static indices_t sortIndices2(samples_t&, labels_t&);
|
||||||
void computeCutPoints(size_t, size_t);
|
void computeCutPoints(size_t, size_t);
|
||||||
bool mdlp(size_t, size_t, size_t);
|
bool mdlp(size_t, size_t, size_t);
|
||||||
long int getCandidate(size_t, size_t);
|
size_t getCandidate(size_t, size_t);
|
||||||
|
size_t getCandidateWeka(size_t, size_t);
|
||||||
void computeCutPointsAlternative(size_t, size_t);
|
void computeCutPointsAlternative(size_t, size_t);
|
||||||
void computeCutPointsProposal();
|
void computeCutPointsProposal(size_t, size_t);
|
||||||
|
precision_t value_cut_point(size_t, size_t);
|
||||||
|
tuple<precision_t, size_t> value_proposal_cut_point(size_t, size_t, size_t);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CPPFImdlp(int);
|
CPPFImdlp(int);
|
||||||
~CPPFImdlp();
|
~CPPFImdlp();
|
||||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||||
samples_t getCutPoints();
|
samples_t getCutPoints();
|
||||||
|
inline string version() { return "0.8.1"; };
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@@ -39,7 +39,7 @@ namespace mdlp {
|
|||||||
for (auto count : counts) {
|
for (auto count : counts) {
|
||||||
if (count > 0) {
|
if (count > 0) {
|
||||||
p = (precision_t)count / nElements;
|
p = (precision_t)count / nElements;
|
||||||
ventropy -= p * log2(p);
|
ventropy -= p * log(p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
entropyCache[make_tuple(start, end)] = ventropy;
|
entropyCache[make_tuple(start, end)] = ventropy;
|
||||||
|
152
feature0
Normal file
152
feature0
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
+++++++++++++++++++++++
|
||||||
|
( 0, 13) -> (4.3, 0)
|
||||||
|
( 1, 8) -> (4.4, 0)
|
||||||
|
( 2, 38) -> (4.4, 0)
|
||||||
|
( 3, 42) -> (4.4, 0)
|
||||||
|
( 4, 41) -> (4.5, 0)
|
||||||
|
( 5, 3) -> (4.6, 0)
|
||||||
|
( 6, 6) -> (4.6, 0)
|
||||||
|
( 7, 22) -> (4.6, 0)
|
||||||
|
( 8, 47) -> (4.6, 0)
|
||||||
|
( 9, 2) -> (4.7, 0)
|
||||||
|
( 10, 29) -> (4.7, 0)
|
||||||
|
( 11, 11) -> (4.8, 0)
|
||||||
|
( 12, 12) -> (4.8, 0)
|
||||||
|
( 13, 24) -> (4.8, 0)
|
||||||
|
( 14, 30) -> (4.8, 0)
|
||||||
|
( 15, 45) -> (4.8, 0)
|
||||||
|
( 16, 1) -> (4.9, 0)
|
||||||
|
( 17, 9) -> (4.9, 0)
|
||||||
|
( 18, 34) -> (4.9, 0)
|
||||||
|
( 19, 37) -> (4.9, 0)
|
||||||
|
( 20, 57) -> (4.9, 1) candidate Total Entropy: 0.633 E. left: 0.000 E. right: 0.855 = 0.539 (0, 54) No
|
||||||
|
( 21, 106) -> (4.9, 2)
|
||||||
|
( 22, 4) -> (5.0, 0)
|
||||||
|
( 23, 7) -> (5.0, 0)
|
||||||
|
( 24, 25) -> (5.0, 0)
|
||||||
|
( 25, 26) -> (5.0, 0)
|
||||||
|
( 26, 35) -> (5.0, 0)
|
||||||
|
( 27, 40) -> (5.0, 0)
|
||||||
|
( 28, 43) -> (5.0, 0)
|
||||||
|
( 29, 49) -> (5.0, 0)
|
||||||
|
( 30, 60) -> (5.0, 1)
|
||||||
|
( 31, 93) -> (5.0, 1)
|
||||||
|
( 32, 0) -> (5.1, 0)
|
||||||
|
( 33, 17) -> (5.1, 0)
|
||||||
|
( 34, 19) -> (5.1, 0)
|
||||||
|
( 35, 21) -> (5.1, 0)
|
||||||
|
( 36, 23) -> (5.1, 0)
|
||||||
|
( 37, 39) -> (5.1, 0)
|
||||||
|
( 38, 44) -> (5.1, 0)
|
||||||
|
( 39, 46) -> (5.1, 0)
|
||||||
|
( 40, 98) -> (5.1, 1)
|
||||||
|
( 41, 27) -> (5.2, 0)
|
||||||
|
( 42, 28) -> (5.2, 0)
|
||||||
|
( 43, 32) -> (5.2, 0)
|
||||||
|
( 44, 59) -> (5.2, 1)
|
||||||
|
( 45, 48) -> (5.3, 0)
|
||||||
|
( 46, 5) -> (5.4, 0)
|
||||||
|
( 47, 10) -> (5.4, 0)
|
||||||
|
( 48, 16) -> (5.4, 0)
|
||||||
|
( 49, 20) -> (5.4, 0)
|
||||||
|
( 50, 31) -> (5.4, 0)
|
||||||
|
( 51, 84) -> (5.4, 1)
|
||||||
|
( 52, 33) -> (5.5, 0)
|
||||||
|
( 53, 36) -> (5.5, 0)
|
||||||
|
( 54, 53) -> (5.5, 1) 1st cut Total Entropy: 1.585 E. left: 0.633 E. right: 1.167 = 0.975 (0, 150) Sí => 5.450
|
||||||
|
( 55, 80) -> (5.5, 1)
|
||||||
|
( 56, 81) -> (5.5, 1)
|
||||||
|
( 57, 89) -> (5.5, 1)
|
||||||
|
( 58, 90) -> (5.5, 1)
|
||||||
|
( 59, 64) -> (5.6, 1)
|
||||||
|
( 60, 66) -> (5.6, 1)
|
||||||
|
( 61, 69) -> (5.6, 1)
|
||||||
|
( 62, 88) -> (5.6, 1)
|
||||||
|
( 63, 94) -> (5.6, 1)
|
||||||
|
( 64, 121) -> (5.6, 2) Candidate Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 77) No
|
||||||
|
( 65, 15) -> (5.7, 0)
|
||||||
|
( 66, 18) -> (5.7, 0)
|
||||||
|
( 67, 55) -> (5.7, 1)
|
||||||
|
( 68, 79) -> (5.7, 1)
|
||||||
|
( 69, 95) -> (5.7, 1)
|
||||||
|
( 70, 96) -> (5.7, 1)
|
||||||
|
( 71, 99) -> (5.7, 1)
|
||||||
|
( 72, 113) -> (5.7, 2)
|
||||||
|
( 73, 14) -> (5.8, 0)
|
||||||
|
( 74, 67) -> (5.8, 1)
|
||||||
|
( 75, 82) -> (5.8, 1)
|
||||||
|
( 76, 92) -> (5.8, 1)
|
||||||
|
( 77, 101) -> (5.8, 2) 2nd cut Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 150) Sí => 5.750
|
||||||
|
( 78, 114) -> (5.8, 2)
|
||||||
|
( 79, 142) -> (5.8, 2)
|
||||||
|
( 80, 61) -> (5.9, 1)
|
||||||
|
( 81, 70) -> (5.9, 1)
|
||||||
|
( 82, 149) -> (5.9, 2)
|
||||||
|
( 83, 62) -> (6.0, 1)
|
||||||
|
( 84, 78) -> (6.0, 1)
|
||||||
|
( 85, 83) -> (6.0, 1)
|
||||||
|
( 86, 85) -> (6.0, 1)
|
||||||
|
( 87, 119) -> (6.0, 2)
|
||||||
|
( 88, 138) -> (6.0, 2)
|
||||||
|
( 89, 63) -> (6.1, 1)
|
||||||
|
( 90, 71) -> (6.1, 1)
|
||||||
|
( 91, 73) -> (6.1, 1)
|
||||||
|
( 92, 91) -> (6.1, 1)
|
||||||
|
( 93, 127) -> (6.1, 2)
|
||||||
|
( 94, 134) -> (6.1, 2)
|
||||||
|
( 95, 68) -> (6.2, 1)
|
||||||
|
( 96, 97) -> (6.2, 1)
|
||||||
|
( 97, 126) -> (6.2, 2)
|
||||||
|
( 98, 148) -> (6.2, 2)
|
||||||
|
( 99, 56) -> (6.3, 1)
|
||||||
|
(100, 72) -> (6.3, 1)
|
||||||
|
(101, 87) -> (6.3, 1)
|
||||||
|
(102, 100) -> (6.3, 2)
|
||||||
|
(103, 103) -> (6.3, 2)
|
||||||
|
(104, 123) -> (6.3, 2)
|
||||||
|
(105, 133) -> (6.3, 2)
|
||||||
|
(106, 136) -> (6.3, 2)
|
||||||
|
(107, 146) -> (6.3, 2)
|
||||||
|
(108, 51) -> (6.4, 1)
|
||||||
|
(109, 74) -> (6.4, 1)
|
||||||
|
(110, 111) -> (6.4, 2)
|
||||||
|
(111, 115) -> (6.4, 2)
|
||||||
|
(112, 128) -> (6.4, 2)
|
||||||
|
(113, 132) -> (6.4, 2)
|
||||||
|
(114, 137) -> (6.4, 2)
|
||||||
|
(115, 54) -> (6.5, 1)
|
||||||
|
(116, 104) -> (6.5, 2)
|
||||||
|
(117, 110) -> (6.5, 2)
|
||||||
|
(118, 116) -> (6.5, 2)
|
||||||
|
(119, 147) -> (6.5, 2)
|
||||||
|
(120, 58) -> (6.6, 1)
|
||||||
|
(121, 75) -> (6.6, 1)
|
||||||
|
(122, 65) -> (6.7, 1)
|
||||||
|
(123, 77) -> (6.7, 1)
|
||||||
|
(124, 86) -> (6.7, 1)
|
||||||
|
(125, 108) -> (6.7, 2)
|
||||||
|
(126, 124) -> (6.7, 2)
|
||||||
|
(127, 140) -> (6.7, 2)
|
||||||
|
(128, 144) -> (6.7, 2)
|
||||||
|
(129, 145) -> (6.7, 2)
|
||||||
|
(130, 76) -> (6.8, 1)
|
||||||
|
(131, 112) -> (6.8, 2)
|
||||||
|
(132, 143) -> (6.8, 2)
|
||||||
|
(133, 52) -> (6.9, 1)
|
||||||
|
(134, 120) -> (6.9, 2)
|
||||||
|
(135, 139) -> (6.9, 2)
|
||||||
|
(136, 141) -> (6.9, 2)
|
||||||
|
(137, 50) -> (7.0, 1)
|
||||||
|
(138, 102) -> (7.1, 2) candidate Total Entropy: 0.939 E. left: 0.984 E. right: 0.000 = 0.822 (77, 150) No
|
||||||
|
(139, 109) -> (7.2, 2)
|
||||||
|
(140, 125) -> (7.2, 2)
|
||||||
|
(141, 129) -> (7.2, 2)
|
||||||
|
(142, 107) -> (7.3, 2)
|
||||||
|
(143, 130) -> (7.4, 2)
|
||||||
|
(144, 105) -> (7.6, 2)
|
||||||
|
(145, 117) -> (7.7, 2)
|
||||||
|
(146, 118) -> (7.7, 2)
|
||||||
|
(147, 122) -> (7.7, 2)
|
||||||
|
(148, 135) -> (7.7, 2)
|
||||||
|
(149, 131) -> (7.9, 2)
|
||||||
|
+++++++++++++++++++++++
|
@@ -4,3 +4,4 @@ project(main)
|
|||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
|
|
||||||
add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||||
|
add_executable(test test.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||||
|
@@ -5,6 +5,7 @@
|
|||||||
#include "../CPPFImdlp.h"
|
#include "../CPPFImdlp.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
using namespace mdlp;
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
@@ -33,8 +34,8 @@ int main(int argc, char** argv)
|
|||||||
cout << "Class name: " << file.getClassName() << endl;
|
cout << "Class name: " << file.getClassName() << endl;
|
||||||
cout << "Class type: " << file.getClassType() << endl;
|
cout << "Class type: " << file.getClassType() << endl;
|
||||||
cout << "Data: " << endl;
|
cout << "Data: " << endl;
|
||||||
vector<vector<float>>& X = file.getX();
|
vector<samples_t>& X = file.getX();
|
||||||
vector<int>& y = file.getY();
|
labels_t& y = file.getY();
|
||||||
for (int i = 0; i < 50; i++) {
|
for (int i = 0; i < 50; i++) {
|
||||||
for (auto feature : X) {
|
for (auto feature : X) {
|
||||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||||
|
95
sample/test.cpp
Normal file
95
sample/test.cpp
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
#include "ArffFiles.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <iomanip>
|
||||||
|
#include "../CPPFImdlp.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace mdlp;
|
||||||
|
|
||||||
|
tuple<precision_t, size_t> getCutPoint(samples_t& X, labels_t& y, size_t start, size_t cut, size_t end)
|
||||||
|
{
|
||||||
|
size_t idxPrev = cut - 1;
|
||||||
|
precision_t previous, next, actual;
|
||||||
|
previous = X[idxPrev];
|
||||||
|
next = actual = X[cut];
|
||||||
|
// definition 2 of the paper => X[t-1] < X[t]
|
||||||
|
while (idxPrev-- > start && actual == previous) {
|
||||||
|
previous = X[idxPrev];
|
||||||
|
}
|
||||||
|
// get the last equal value of X in the interval
|
||||||
|
while (actual == X[cut++] && cut < end);
|
||||||
|
if (previous == actual && cut < end)
|
||||||
|
actual = X[cut];
|
||||||
|
cut--;
|
||||||
|
return make_tuple((previous + actual) / 2, cut);
|
||||||
|
}
|
||||||
|
|
||||||
|
void show_points(samples_t& X, labels_t& y, size_t start, size_t end)
|
||||||
|
{
|
||||||
|
cout << "Interval: " << start << " - " << end << endl;
|
||||||
|
tuple<precision_t, size_t> cutPoint;
|
||||||
|
size_t cut = start + 1;
|
||||||
|
if (start >= end) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while (y[cut - 1] == y[cut] && cut < end)
|
||||||
|
cut++;
|
||||||
|
if (cut != end) {
|
||||||
|
cutPoint = getCutPoint(X, y, start, cut, end);
|
||||||
|
cout << cut << ": " << fixed << setprecision(1) << X[cut] << " " << y[cut] << endl;
|
||||||
|
cout << "Cut point: " << get<0>(cutPoint) << " at " << get<1>(cutPoint) << endl;
|
||||||
|
show_points(X, y, start, get<1>(cutPoint));
|
||||||
|
show_points(X, y, get<1>(cutPoint), end);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
vector<string> lines;
|
||||||
|
string path = "../tests/";
|
||||||
|
map<string, bool > datasets = {
|
||||||
|
{"01", true},
|
||||||
|
{"02", true},
|
||||||
|
{"03", true},
|
||||||
|
{"04", true}
|
||||||
|
};
|
||||||
|
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||||
|
cout << "Usage: " << argv[0] << " {01, 02, 03, 04}" << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||||
|
auto attributes = file.getAttributes();
|
||||||
|
int items = file.getSize();
|
||||||
|
cout << "Number of lines: " << items << endl;
|
||||||
|
cout << "Attributes: " << endl;
|
||||||
|
for (auto attribute : attributes) {
|
||||||
|
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||||
|
}
|
||||||
|
cout << "Class name: " << file.getClassName() << endl;
|
||||||
|
cout << "Class type: " << file.getClassType() << endl;
|
||||||
|
cout << "Data: " << endl;
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
labels_t& y = file.getY();
|
||||||
|
for (int i = 0; i < y.size(); i++) {
|
||||||
|
for (auto feature : X) {
|
||||||
|
cout << i << ": " << fixed << setprecision(1) << feature[i] << " ";
|
||||||
|
}
|
||||||
|
cout << y[i] << endl;
|
||||||
|
}
|
||||||
|
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||||
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
|
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||||
|
cout << "--------------------------" << setprecision(3) << endl;
|
||||||
|
test.fit(X[i], y);
|
||||||
|
for (auto item : test.getCutPoints()) {
|
||||||
|
cout << item << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cout << "Function test" << endl;
|
||||||
|
show_points(X[0], y, 0, items);
|
||||||
|
return 0;
|
||||||
|
}
|
35
sample/tests/01.arff
Executable file
35
sample/tests/01.arff
Executable file
@@ -0,0 +1,35 @@
|
|||||||
|
% .
|
||||||
|
|
||||||
|
@RELATION 01
|
||||||
|
|
||||||
|
@ATTRIBUTE X REAL
|
||||||
|
@ATTRIBUTE class {0,1,2}
|
||||||
|
|
||||||
|
@DATA
|
||||||
|
1, 0
|
||||||
|
1, 0
|
||||||
|
1, 0
|
||||||
|
1, 0
|
||||||
|
1, 0
|
||||||
|
1, 0
|
||||||
|
1, 0
|
||||||
|
2, 0
|
||||||
|
2, 0
|
||||||
|
2, 0
|
||||||
|
2, 1
|
||||||
|
2, 2
|
||||||
|
2, 2
|
||||||
|
2, 2
|
||||||
|
2, 2
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 1
|
||||||
|
3, 1
|
||||||
|
3, 1
|
||||||
|
3, 2
|
||||||
|
3, 2
|
||||||
|
4, 0
|
||||||
|
4, 1
|
25
sample/tests/02.arff
Executable file
25
sample/tests/02.arff
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
% .
|
||||||
|
|
||||||
|
@RELATION 01
|
||||||
|
|
||||||
|
@ATTRIBUTE X REAL
|
||||||
|
@ATTRIBUTE class {0,1,2}
|
||||||
|
|
||||||
|
@DATA
|
||||||
|
2, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 1
|
||||||
|
3, 1
|
||||||
|
3, 1
|
||||||
|
3, 2
|
||||||
|
3, 2
|
||||||
|
4, 0
|
||||||
|
4, 1
|
||||||
|
4, 1
|
||||||
|
4, 1
|
||||||
|
4, 1
|
||||||
|
4, 1
|
24
sample/tests/03.arff
Executable file
24
sample/tests/03.arff
Executable file
@@ -0,0 +1,24 @@
|
|||||||
|
% .
|
||||||
|
|
||||||
|
@RELATION 01
|
||||||
|
|
||||||
|
@ATTRIBUTE X REAL
|
||||||
|
@ATTRIBUTE class {0,1,2}
|
||||||
|
|
||||||
|
@DATA
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 0
|
||||||
|
3, 1
|
||||||
|
3, 1
|
||||||
|
3, 1
|
||||||
|
3, 2
|
||||||
|
3, 2
|
||||||
|
4, 0
|
||||||
|
4, 1
|
||||||
|
4, 1
|
||||||
|
4, 1
|
||||||
|
4, 1
|
||||||
|
4, 1
|
@@ -15,12 +15,12 @@ namespace mdlp {
|
|||||||
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||||
X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9};
|
X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9};
|
||||||
y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
||||||
proposal = false;
|
algorithm = false;
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
void setProposal(bool value) {
|
void setalgorithm(bool value) {
|
||||||
proposal = value;
|
algorithm = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
// void initIndices()
|
// void initIndices()
|
||||||
@@ -80,7 +80,7 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TestFImdlp, TestDataset) {
|
TEST_F(TestFImdlp, TestDataset) {
|
||||||
proposal = false;
|
algorithm = false;
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computeCutPointsOriginal(0, 10);
|
computeCutPointsOriginal(0, 10);
|
||||||
cutPoints_t expected = {5.6499996185302734};
|
cutPoints_t expected = {5.6499996185302734};
|
||||||
@@ -95,14 +95,14 @@ namespace mdlp {
|
|||||||
|
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsOriginal) {
|
TEST_F(TestFImdlp, ComputeCutPointsOriginal) {
|
||||||
cutPoints_t expected = {5.65};
|
cutPoints_t expected = {5.65};
|
||||||
proposal = false;
|
algorithm = false;
|
||||||
computeCutPointsOriginal(0, 10);
|
computeCutPointsOriginal(0, 10);
|
||||||
checkCutPoints(expected);
|
checkCutPoints(expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) {
|
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) {
|
||||||
cutPoints_t expected;
|
cutPoints_t expected;
|
||||||
proposal = false;
|
algorithm = false;
|
||||||
expected = {2};
|
expected = {2};
|
||||||
samples_t X_ = {0, 1, 2, 2};
|
samples_t X_ = {0, 1, 2, 2};
|
||||||
labels_t y_ = {1, 1, 1, 2};
|
labels_t y_ = {1, 1, 1, 2};
|
||||||
@@ -110,19 +110,19 @@ namespace mdlp {
|
|||||||
checkCutPoints(expected);
|
checkCutPoints(expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsProposal) {
|
TEST_F(TestFImdlp, ComputeCutPointsalgorithm) {
|
||||||
proposal = true;
|
algorithm = true;
|
||||||
cutPoints_t expected;
|
cutPoints_t expected;
|
||||||
expected = {};
|
expected = {};
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computeCutPointsProposal();
|
computeCutPointsalgorithm();
|
||||||
checkCutPoints(expected);
|
checkCutPoints(expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) {
|
TEST_F(TestFImdlp, ComputeCutPointsalgorithmGCase) {
|
||||||
cutPoints_t expected;
|
cutPoints_t expected;
|
||||||
expected = {1.5};
|
expected = {1.5};
|
||||||
proposal = true;
|
algorithm = true;
|
||||||
samples_t X_ = {0, 1, 2, 2};
|
samples_t X_ = {0, 1, 2, 2};
|
||||||
labels_t y_ = {1, 1, 1, 2};
|
labels_t y_ = {1, 1, 1, 2};
|
||||||
fit(X_, y_);
|
fit(X_, y_);
|
||||||
@@ -131,7 +131,7 @@ namespace mdlp {
|
|||||||
|
|
||||||
TEST_F(TestFImdlp, GetCutPoints) {
|
TEST_F(TestFImdlp, GetCutPoints) {
|
||||||
samples_t computed, expected = {5.65};
|
samples_t computed, expected = {5.65};
|
||||||
proposal = false;
|
algorithm = false;
|
||||||
computeCutPointsOriginal(0, 10);
|
computeCutPointsOriginal(0, 10);
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
for (auto item: cutPoints)
|
for (auto item: cutPoints)
|
||||||
|
Reference in New Issue
Block a user