mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 23:45:57 +00:00
Refactor testing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -34,3 +34,4 @@
|
||||
**/lcoverage
|
||||
.idea
|
||||
cmake-*
|
||||
**/CMakeFiles
|
||||
|
209
CPPFImdlp.cpp
209
CPPFImdlp.cpp
@@ -4,9 +4,10 @@
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
// OJO QUITAR ESTO
|
||||
#include <iostream>
|
||||
namespace mdlp {
|
||||
CPPFImdlp::CPPFImdlp(int proposal):proposal(proposal), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||
CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||
{
|
||||
}
|
||||
CPPFImdlp::~CPPFImdlp()
|
||||
@@ -23,14 +24,14 @@ namespace mdlp {
|
||||
if (X.size() == 0 || y.size() == 0) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
indices = sortIndices(X_);
|
||||
indices = sortIndices2(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
switch (proposal) {
|
||||
switch (algorithm) {
|
||||
case 0:
|
||||
computeCutPoints(0, X.size());
|
||||
break;
|
||||
case 1:
|
||||
computeCutPointsProposal();
|
||||
computeCutPointsProposal(0, X.size());
|
||||
break;
|
||||
case 2:
|
||||
computeCutPointsAlternative(0, X.size());
|
||||
@@ -38,78 +39,169 @@ namespace mdlp {
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
precision_t CPPFImdlp::value_cut_point(size_t start, size_t idx)
|
||||
{
|
||||
size_t idxPrev = idx - 1;
|
||||
precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
return (previous + actual) / 2;
|
||||
}
|
||||
tuple<precision_t, size_t> CPPFImdlp::value_proposal_cut_point(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t idxPrev = cut - 1;
|
||||
precision_t previous, next, actual;
|
||||
previous = X[indices[idxPrev]];
|
||||
next = actual = X[indices[cut]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
// get the last equal value of X in the interval
|
||||
while (actual == X[indices[cut++]] && cut < end);
|
||||
if (previous == actual && cut < end)
|
||||
actual = X[indices[cut]];
|
||||
cut--;
|
||||
return make_tuple((previous + actual) / 2, cut);
|
||||
}
|
||||
// void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||
// {
|
||||
// size_t cut;
|
||||
// if (end - start < 2)
|
||||
// return;
|
||||
// cut = getCandidate(start, end);
|
||||
// if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
|
||||
// // cut == max means that there is no candidate in the interval
|
||||
// // No boundary found, so we add both ends of the interval as cutpoints
|
||||
// // because they were selected by the algorithm before
|
||||
// if (start != 0)
|
||||
// cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
|
||||
// if (end != X.size())
|
||||
// cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
|
||||
// //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
// return;
|
||||
// }
|
||||
// // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
// computeCutPoints(start, cut);
|
||||
// computeCutPoints(cut, end);
|
||||
// }
|
||||
// void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||
// {
|
||||
// size_t cut;
|
||||
// if (end - start < 2)
|
||||
// return;
|
||||
// cut = getCandidate(start, end);
|
||||
// if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
|
||||
// // cut == max means that there is no candidate in the interval
|
||||
// // No boundary found, so we add both ends of the interval as cutpoints
|
||||
// // because they were selected by the algorithm before
|
||||
// if (start != 0)
|
||||
// cutPoints.push_back(value_cut_point(0, start));
|
||||
// if (end != X.size())
|
||||
// cutPoints.push_back(value_cut_point(start, end));
|
||||
// //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
// return;
|
||||
// }
|
||||
// // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
// computeCutPointsAlternative(start, cut);
|
||||
// computeCutPointsAlternative(cut, end);
|
||||
// }
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||
{
|
||||
int cut;
|
||||
size_t cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == -1 || !mdlp(start, cut, end)) {
|
||||
// cut.value == -1 means that there is no candidate in the interval
|
||||
// No boundary found, so we add both ends of the interval as cutpoints
|
||||
// because they were selected by the algorithm before
|
||||
if (start != 0)
|
||||
cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
|
||||
if (end != X.size())
|
||||
cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
cutPoints.push_back(value_cut_point(start, cut));
|
||||
//cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
}
|
||||
computeCutPoints(start, cut);
|
||||
computeCutPoints(cut, end);
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||
{
|
||||
precision_t cut;
|
||||
size_t cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == -1)
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
cutPoints.push_back((X[indices[cut]] + X[indices[cut - 1]]) / 2);
|
||||
cutPoints.push_back(value_cut_point(start, cut));
|
||||
//cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
computeCutPointsAlternative(start, cut);
|
||||
computeCutPointsAlternative(cut, end);
|
||||
}
|
||||
computeCutPointsAlternative(start, cut);
|
||||
computeCutPointsAlternative(cut, end);
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsProposal()
|
||||
// void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||
// {
|
||||
// size_t cut;
|
||||
// if (end - start < 2)
|
||||
// return;
|
||||
// cut = getCandidateWeka(start, end);
|
||||
// if (cut == numeric_limits<size_t>::max())
|
||||
// return;
|
||||
// if (mdlp(start, cut, end)) {
|
||||
// cutPoints.push_back(value_cut_point(start, cut));
|
||||
// //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
// }
|
||||
// computeCutPointsAlternative(start, cut);
|
||||
// computeCutPointsAlternative(cut, end);
|
||||
// }
|
||||
void CPPFImdlp::computeCutPointsProposal(size_t start, size_t end)
|
||||
{
|
||||
precision_t xPrev, xCur, xPivot, cutPoint;
|
||||
int yPrev, yCur, yPivot;
|
||||
size_t idx, numElements, start;
|
||||
size_t cut;
|
||||
tuple<precision_t, size_t> result;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
//cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
result = value_proposal_cut_point(start, cut, end);
|
||||
cut = get<1>(result);
|
||||
cutPoints.push_back(get<0>(result));
|
||||
//cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
|
||||
computeCutPointsProposal(start, cut);
|
||||
computeCutPointsProposal(cut, end);
|
||||
}
|
||||
|
||||
xCur = xPrev = X[indices[0]];
|
||||
yCur = yPrev = y[indices[0]];
|
||||
numElements = indices.size() - 1;
|
||||
idx = start = 0;
|
||||
while (idx < numElements) {
|
||||
xPivot = xCur;
|
||||
yPivot = yCur;
|
||||
// Read the same values and check class changes
|
||||
do {
|
||||
idx++;
|
||||
xCur = X[indices[idx]];
|
||||
yCur = y[indices[idx]];
|
||||
if (yCur != yPivot && xCur == xPivot) {
|
||||
yPivot = -1;
|
||||
}
|
||||
size_t CPPFImdlp::getCandidateWeka(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||
precision_t entropy_left, entropy_right, minEntropy;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (auto idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (X[indices[idx - 1]] < X[indices[idx]]) {
|
||||
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
while (idx < numElements && xCur == xPivot);
|
||||
// Check if the class changed and there are more than 1 element
|
||||
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && mdlp(start, idx, indices.size())) {
|
||||
start = idx;
|
||||
cutPoint = (xPrev + xCur) / 2;
|
||||
cutPoints.push_back(cutPoint);
|
||||
}
|
||||
yPrev = yPivot;
|
||||
xPrev = xPivot;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
long int CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
long int candidate = -1, elements = end - start;
|
||||
precision_t entropy_left, entropy_right, minEntropy = numeric_limits<precision_t>::max();
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||
precision_t entropy_left, entropy_right, minEntropy;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (auto idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||
@@ -137,9 +229,9 @@ namespace mdlp {
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = log2(pow(3, precision_t(k)) - 2) -
|
||||
delta = log(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
precision_t term = 1 / N * (log(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
cutPoints_t CPPFImdlp::getCutPoints()
|
||||
@@ -164,4 +256,17 @@ namespace mdlp {
|
||||
{ return X_[i1] < X_[i2]; });
|
||||
return idx;
|
||||
}
|
||||
indices_t CPPFImdlp::sortIndices2(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
for (size_t i = 0; i < X_.size(); i++)
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2)
|
||||
{
|
||||
if (X_[i1] == X_[i2]) return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
|
13
CPPFImdlp.h
13
CPPFImdlp.h
@@ -3,10 +3,12 @@
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <utility>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
int proposal;
|
||||
int algorithm;
|
||||
indices_t indices; // sorted indices to use with X and y
|
||||
samples_t X;
|
||||
labels_t y;
|
||||
@@ -14,17 +16,22 @@ namespace mdlp {
|
||||
cutPoints_t cutPoints;
|
||||
|
||||
static indices_t sortIndices(samples_t&);
|
||||
static indices_t sortIndices2(samples_t&, labels_t&);
|
||||
void computeCutPoints(size_t, size_t);
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
long int getCandidate(size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
size_t getCandidateWeka(size_t, size_t);
|
||||
void computeCutPointsAlternative(size_t, size_t);
|
||||
void computeCutPointsProposal();
|
||||
void computeCutPointsProposal(size_t, size_t);
|
||||
precision_t value_cut_point(size_t, size_t);
|
||||
tuple<precision_t, size_t> value_proposal_cut_point(size_t, size_t, size_t);
|
||||
|
||||
public:
|
||||
CPPFImdlp(int);
|
||||
~CPPFImdlp();
|
||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||
samples_t getCutPoints();
|
||||
inline string version() { return "0.8.1"; };
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -39,7 +39,7 @@ namespace mdlp {
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = (precision_t)count / nElements;
|
||||
ventropy -= p * log2(p);
|
||||
ventropy -= p * log(p);
|
||||
}
|
||||
}
|
||||
entropyCache[make_tuple(start, end)] = ventropy;
|
||||
|
152
feature0
Normal file
152
feature0
Normal file
@@ -0,0 +1,152 @@
|
||||
+++++++++++++++++++++++
|
||||
( 0, 13) -> (4.3, 0)
|
||||
( 1, 8) -> (4.4, 0)
|
||||
( 2, 38) -> (4.4, 0)
|
||||
( 3, 42) -> (4.4, 0)
|
||||
( 4, 41) -> (4.5, 0)
|
||||
( 5, 3) -> (4.6, 0)
|
||||
( 6, 6) -> (4.6, 0)
|
||||
( 7, 22) -> (4.6, 0)
|
||||
( 8, 47) -> (4.6, 0)
|
||||
( 9, 2) -> (4.7, 0)
|
||||
( 10, 29) -> (4.7, 0)
|
||||
( 11, 11) -> (4.8, 0)
|
||||
( 12, 12) -> (4.8, 0)
|
||||
( 13, 24) -> (4.8, 0)
|
||||
( 14, 30) -> (4.8, 0)
|
||||
( 15, 45) -> (4.8, 0)
|
||||
( 16, 1) -> (4.9, 0)
|
||||
( 17, 9) -> (4.9, 0)
|
||||
( 18, 34) -> (4.9, 0)
|
||||
( 19, 37) -> (4.9, 0)
|
||||
( 20, 57) -> (4.9, 1) candidate Total Entropy: 0.633 E. left: 0.000 E. right: 0.855 = 0.539 (0, 54) No
|
||||
( 21, 106) -> (4.9, 2)
|
||||
( 22, 4) -> (5.0, 0)
|
||||
( 23, 7) -> (5.0, 0)
|
||||
( 24, 25) -> (5.0, 0)
|
||||
( 25, 26) -> (5.0, 0)
|
||||
( 26, 35) -> (5.0, 0)
|
||||
( 27, 40) -> (5.0, 0)
|
||||
( 28, 43) -> (5.0, 0)
|
||||
( 29, 49) -> (5.0, 0)
|
||||
( 30, 60) -> (5.0, 1)
|
||||
( 31, 93) -> (5.0, 1)
|
||||
( 32, 0) -> (5.1, 0)
|
||||
( 33, 17) -> (5.1, 0)
|
||||
( 34, 19) -> (5.1, 0)
|
||||
( 35, 21) -> (5.1, 0)
|
||||
( 36, 23) -> (5.1, 0)
|
||||
( 37, 39) -> (5.1, 0)
|
||||
( 38, 44) -> (5.1, 0)
|
||||
( 39, 46) -> (5.1, 0)
|
||||
( 40, 98) -> (5.1, 1)
|
||||
( 41, 27) -> (5.2, 0)
|
||||
( 42, 28) -> (5.2, 0)
|
||||
( 43, 32) -> (5.2, 0)
|
||||
( 44, 59) -> (5.2, 1)
|
||||
( 45, 48) -> (5.3, 0)
|
||||
( 46, 5) -> (5.4, 0)
|
||||
( 47, 10) -> (5.4, 0)
|
||||
( 48, 16) -> (5.4, 0)
|
||||
( 49, 20) -> (5.4, 0)
|
||||
( 50, 31) -> (5.4, 0)
|
||||
( 51, 84) -> (5.4, 1)
|
||||
( 52, 33) -> (5.5, 0)
|
||||
( 53, 36) -> (5.5, 0)
|
||||
( 54, 53) -> (5.5, 1) 1st cut Total Entropy: 1.585 E. left: 0.633 E. right: 1.167 = 0.975 (0, 150) Sí => 5.450
|
||||
( 55, 80) -> (5.5, 1)
|
||||
( 56, 81) -> (5.5, 1)
|
||||
( 57, 89) -> (5.5, 1)
|
||||
( 58, 90) -> (5.5, 1)
|
||||
( 59, 64) -> (5.6, 1)
|
||||
( 60, 66) -> (5.6, 1)
|
||||
( 61, 69) -> (5.6, 1)
|
||||
( 62, 88) -> (5.6, 1)
|
||||
( 63, 94) -> (5.6, 1)
|
||||
( 64, 121) -> (5.6, 2) Candidate Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 77) No
|
||||
( 65, 15) -> (5.7, 0)
|
||||
( 66, 18) -> (5.7, 0)
|
||||
( 67, 55) -> (5.7, 1)
|
||||
( 68, 79) -> (5.7, 1)
|
||||
( 69, 95) -> (5.7, 1)
|
||||
( 70, 96) -> (5.7, 1)
|
||||
( 71, 99) -> (5.7, 1)
|
||||
( 72, 113) -> (5.7, 2)
|
||||
( 73, 14) -> (5.8, 0)
|
||||
( 74, 67) -> (5.8, 1)
|
||||
( 75, 82) -> (5.8, 1)
|
||||
( 76, 92) -> (5.8, 1)
|
||||
( 77, 101) -> (5.8, 2) 2nd cut Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 150) Sí => 5.750
|
||||
( 78, 114) -> (5.8, 2)
|
||||
( 79, 142) -> (5.8, 2)
|
||||
( 80, 61) -> (5.9, 1)
|
||||
( 81, 70) -> (5.9, 1)
|
||||
( 82, 149) -> (5.9, 2)
|
||||
( 83, 62) -> (6.0, 1)
|
||||
( 84, 78) -> (6.0, 1)
|
||||
( 85, 83) -> (6.0, 1)
|
||||
( 86, 85) -> (6.0, 1)
|
||||
( 87, 119) -> (6.0, 2)
|
||||
( 88, 138) -> (6.0, 2)
|
||||
( 89, 63) -> (6.1, 1)
|
||||
( 90, 71) -> (6.1, 1)
|
||||
( 91, 73) -> (6.1, 1)
|
||||
( 92, 91) -> (6.1, 1)
|
||||
( 93, 127) -> (6.1, 2)
|
||||
( 94, 134) -> (6.1, 2)
|
||||
( 95, 68) -> (6.2, 1)
|
||||
( 96, 97) -> (6.2, 1)
|
||||
( 97, 126) -> (6.2, 2)
|
||||
( 98, 148) -> (6.2, 2)
|
||||
( 99, 56) -> (6.3, 1)
|
||||
(100, 72) -> (6.3, 1)
|
||||
(101, 87) -> (6.3, 1)
|
||||
(102, 100) -> (6.3, 2)
|
||||
(103, 103) -> (6.3, 2)
|
||||
(104, 123) -> (6.3, 2)
|
||||
(105, 133) -> (6.3, 2)
|
||||
(106, 136) -> (6.3, 2)
|
||||
(107, 146) -> (6.3, 2)
|
||||
(108, 51) -> (6.4, 1)
|
||||
(109, 74) -> (6.4, 1)
|
||||
(110, 111) -> (6.4, 2)
|
||||
(111, 115) -> (6.4, 2)
|
||||
(112, 128) -> (6.4, 2)
|
||||
(113, 132) -> (6.4, 2)
|
||||
(114, 137) -> (6.4, 2)
|
||||
(115, 54) -> (6.5, 1)
|
||||
(116, 104) -> (6.5, 2)
|
||||
(117, 110) -> (6.5, 2)
|
||||
(118, 116) -> (6.5, 2)
|
||||
(119, 147) -> (6.5, 2)
|
||||
(120, 58) -> (6.6, 1)
|
||||
(121, 75) -> (6.6, 1)
|
||||
(122, 65) -> (6.7, 1)
|
||||
(123, 77) -> (6.7, 1)
|
||||
(124, 86) -> (6.7, 1)
|
||||
(125, 108) -> (6.7, 2)
|
||||
(126, 124) -> (6.7, 2)
|
||||
(127, 140) -> (6.7, 2)
|
||||
(128, 144) -> (6.7, 2)
|
||||
(129, 145) -> (6.7, 2)
|
||||
(130, 76) -> (6.8, 1)
|
||||
(131, 112) -> (6.8, 2)
|
||||
(132, 143) -> (6.8, 2)
|
||||
(133, 52) -> (6.9, 1)
|
||||
(134, 120) -> (6.9, 2)
|
||||
(135, 139) -> (6.9, 2)
|
||||
(136, 141) -> (6.9, 2)
|
||||
(137, 50) -> (7.0, 1)
|
||||
(138, 102) -> (7.1, 2) candidate Total Entropy: 0.939 E. left: 0.984 E. right: 0.000 = 0.822 (77, 150) No
|
||||
(139, 109) -> (7.2, 2)
|
||||
(140, 125) -> (7.2, 2)
|
||||
(141, 129) -> (7.2, 2)
|
||||
(142, 107) -> (7.3, 2)
|
||||
(143, 130) -> (7.4, 2)
|
||||
(144, 105) -> (7.6, 2)
|
||||
(145, 117) -> (7.7, 2)
|
||||
(146, 118) -> (7.7, 2)
|
||||
(147, 122) -> (7.7, 2)
|
||||
(148, 135) -> (7.7, 2)
|
||||
(149, 131) -> (7.9, 2)
|
||||
+++++++++++++++++++++++
|
@@ -4,3 +4,4 @@ project(main)
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||
add_executable(test test.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include "../CPPFImdlp.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mdlp;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
@@ -33,8 +34,8 @@ int main(int argc, char** argv)
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<vector<float>>& X = file.getX();
|
||||
vector<int>& y = file.getY();
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (auto feature : X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
|
95
sample/test.cpp
Normal file
95
sample/test.cpp
Normal file
@@ -0,0 +1,95 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include "../CPPFImdlp.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mdlp;
|
||||
|
||||
tuple<precision_t, size_t> getCutPoint(samples_t& X, labels_t& y, size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t idxPrev = cut - 1;
|
||||
precision_t previous, next, actual;
|
||||
previous = X[idxPrev];
|
||||
next = actual = X[cut];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[idxPrev];
|
||||
}
|
||||
// get the last equal value of X in the interval
|
||||
while (actual == X[cut++] && cut < end);
|
||||
if (previous == actual && cut < end)
|
||||
actual = X[cut];
|
||||
cut--;
|
||||
return make_tuple((previous + actual) / 2, cut);
|
||||
}
|
||||
|
||||
void show_points(samples_t& X, labels_t& y, size_t start, size_t end)
|
||||
{
|
||||
cout << "Interval: " << start << " - " << end << endl;
|
||||
tuple<precision_t, size_t> cutPoint;
|
||||
size_t cut = start + 1;
|
||||
if (start >= end) {
|
||||
return;
|
||||
}
|
||||
while (y[cut - 1] == y[cut] && cut < end)
|
||||
cut++;
|
||||
if (cut != end) {
|
||||
cutPoint = getCutPoint(X, y, start, cut, end);
|
||||
cout << cut << ": " << fixed << setprecision(1) << X[cut] << " " << y[cut] << endl;
|
||||
cout << "Cut point: " << get<0>(cutPoint) << " at " << get<1>(cutPoint) << endl;
|
||||
show_points(X, y, start, get<1>(cutPoint));
|
||||
show_points(X, y, get<1>(cutPoint), end);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "../tests/";
|
||||
map<string, bool > datasets = {
|
||||
{"01", true},
|
||||
{"02", true},
|
||||
{"03", true},
|
||||
{"04", true}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {01, 02, 03, 04}" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute : attributes) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
for (int i = 0; i < y.size(); i++) {
|
||||
for (auto feature : X) {
|
||||
cout << i << ": " << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
}
|
||||
cout << "Function test" << endl;
|
||||
show_points(X[0], y, 0, items);
|
||||
return 0;
|
||||
}
|
35
sample/tests/01.arff
Executable file
35
sample/tests/01.arff
Executable file
@@ -0,0 +1,35 @@
|
||||
% .
|
||||
|
||||
@RELATION 01
|
||||
|
||||
@ATTRIBUTE X REAL
|
||||
@ATTRIBUTE class {0,1,2}
|
||||
|
||||
@DATA
|
||||
1, 0
|
||||
1, 0
|
||||
1, 0
|
||||
1, 0
|
||||
1, 0
|
||||
1, 0
|
||||
1, 0
|
||||
2, 0
|
||||
2, 0
|
||||
2, 0
|
||||
2, 1
|
||||
2, 2
|
||||
2, 2
|
||||
2, 2
|
||||
2, 2
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 1
|
||||
3, 1
|
||||
3, 1
|
||||
3, 2
|
||||
3, 2
|
||||
4, 0
|
||||
4, 1
|
25
sample/tests/02.arff
Executable file
25
sample/tests/02.arff
Executable file
@@ -0,0 +1,25 @@
|
||||
% .
|
||||
|
||||
@RELATION 01
|
||||
|
||||
@ATTRIBUTE X REAL
|
||||
@ATTRIBUTE class {0,1,2}
|
||||
|
||||
@DATA
|
||||
2, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 1
|
||||
3, 1
|
||||
3, 1
|
||||
3, 2
|
||||
3, 2
|
||||
4, 0
|
||||
4, 1
|
||||
4, 1
|
||||
4, 1
|
||||
4, 1
|
||||
4, 1
|
24
sample/tests/03.arff
Executable file
24
sample/tests/03.arff
Executable file
@@ -0,0 +1,24 @@
|
||||
% .
|
||||
|
||||
@RELATION 01
|
||||
|
||||
@ATTRIBUTE X REAL
|
||||
@ATTRIBUTE class {0,1,2}
|
||||
|
||||
@DATA
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 0
|
||||
3, 1
|
||||
3, 1
|
||||
3, 1
|
||||
3, 2
|
||||
3, 2
|
||||
4, 0
|
||||
4, 1
|
||||
4, 1
|
||||
4, 1
|
||||
4, 1
|
||||
4, 1
|
@@ -15,12 +15,12 @@ namespace mdlp {
|
||||
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||
X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9};
|
||||
y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
||||
proposal = false;
|
||||
algorithm = false;
|
||||
fit(X, y);
|
||||
}
|
||||
|
||||
void setProposal(bool value) {
|
||||
proposal = value;
|
||||
void setalgorithm(bool value) {
|
||||
algorithm = value;
|
||||
}
|
||||
|
||||
// void initIndices()
|
||||
@@ -80,7 +80,7 @@ namespace mdlp {
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, TestDataset) {
|
||||
proposal = false;
|
||||
algorithm = false;
|
||||
fit(X, y);
|
||||
computeCutPointsOriginal(0, 10);
|
||||
cutPoints_t expected = {5.6499996185302734};
|
||||
@@ -95,14 +95,14 @@ namespace mdlp {
|
||||
|
||||
TEST_F(TestFImdlp, ComputeCutPointsOriginal) {
|
||||
cutPoints_t expected = {5.65};
|
||||
proposal = false;
|
||||
algorithm = false;
|
||||
computeCutPointsOriginal(0, 10);
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) {
|
||||
cutPoints_t expected;
|
||||
proposal = false;
|
||||
algorithm = false;
|
||||
expected = {2};
|
||||
samples_t X_ = {0, 1, 2, 2};
|
||||
labels_t y_ = {1, 1, 1, 2};
|
||||
@@ -110,19 +110,19 @@ namespace mdlp {
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, ComputeCutPointsProposal) {
|
||||
proposal = true;
|
||||
TEST_F(TestFImdlp, ComputeCutPointsalgorithm) {
|
||||
algorithm = true;
|
||||
cutPoints_t expected;
|
||||
expected = {};
|
||||
fit(X, y);
|
||||
computeCutPointsProposal();
|
||||
computeCutPointsalgorithm();
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) {
|
||||
TEST_F(TestFImdlp, ComputeCutPointsalgorithmGCase) {
|
||||
cutPoints_t expected;
|
||||
expected = {1.5};
|
||||
proposal = true;
|
||||
algorithm = true;
|
||||
samples_t X_ = {0, 1, 2, 2};
|
||||
labels_t y_ = {1, 1, 1, 2};
|
||||
fit(X_, y_);
|
||||
@@ -131,7 +131,7 @@ namespace mdlp {
|
||||
|
||||
TEST_F(TestFImdlp, GetCutPoints) {
|
||||
samples_t computed, expected = {5.65};
|
||||
proposal = false;
|
||||
algorithm = false;
|
||||
computeCutPointsOriginal(0, 10);
|
||||
computed = getCutPoints();
|
||||
for (auto item: cutPoints)
|
||||
|
Reference in New Issue
Block a user