Merge pull request #3 from rmontanana/refactorValueCutPoint

- Had to deal with duplicated values in the variables and each practical case.
- Removed unused alternative versions of the algorithm.
- Now, intervals with the same value of the variable are not taken into account for cutpoints.
- Intervals have to have more than two examples to be evaluated
- Added some more datasets to the sample
This commit is contained in:
Ricardo Montañana Gómez
2023-02-21 19:13:33 +01:00
committed by GitHub
17 changed files with 739 additions and 189 deletions

26
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,26 @@
name: Build
on:
push:
branches:
- main
jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
- uses: sonarsource/sonarqube-scan-action@master
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
# If you wish to fail your job when the Quality Gate is red, uncomment the
# following lines. This would typically be used to fail a deployment.
# - uses: sonarsource/sonarqube-quality-gate-action@master
# timeout-minutes: 5
# env:
# SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

1
.gitignore vendored
View File

@@ -35,3 +35,4 @@
.idea .idea
cmake-* cmake-*
**/CMakeFiles **/CMakeFiles
sonar-project.properties

22
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,22 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "(lldb) Launch",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceRoot}/sample/build/sample",
"args": [
"mfeat-factors"
],
"stopAtEntry": false,
"cwd": "${workspaceRoot}/sample/build/",
"environment": [],
"externalConsole": false,
"MIMode": "lldb"
}
]
}

5
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"sonarlint.connectedMode.project": {
"projectKey": "rmontanana_mdlp_AYZkjILJHyjW-meBaElG"
}
}

29
.vscode/tasks.json vendored Normal file
View File

@@ -0,0 +1,29 @@
{
"tasks": [
{
"type": "cppbuild",
"label": "C/C++: clang++ build active file",
"command": "/usr/bin/clang++",
"args": [
"-fcolor-diagnostics",
"-fansi-escape-codes",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
}
],
"version": "2.0.0"
}

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(mdlp) project(mdlp)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 11)
add_library(mdlp CPPFImdlp.cpp Metrics.cpp) add_library(mdlp CPPFImdlp.cpp Metrics.cpp)

View File

@@ -4,12 +4,15 @@
#include <cmath> #include <cmath>
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include "Metrics.h" #include "Metrics.h"
namespace mdlp { namespace mdlp {
CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
CPPFImdlp::CPPFImdlp(): indices(indices_t()), X(samples_t()), y(labels_t()),
metrics(Metrics(y, indices))
{ {
} }
CPPFImdlp::~CPPFImdlp() CPPFImdlp::~CPPFImdlp() = default;
= default;
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_) CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
{ {
X = X_; X = X_;
@@ -18,113 +21,82 @@ namespace mdlp {
if (X.size() != y.size()) { if (X.size() != y.size()) {
throw invalid_argument("X and y must have the same size"); throw invalid_argument("X and y must have the same size");
} }
if (X.size() == 0 || y.size() == 0) { if (X.empty() || y.empty()) {
throw invalid_argument("X and y must have at least one element"); throw invalid_argument("X and y must have at least one element");
} }
indices = sortIndices(X_, y_); indices = sortIndices(X_, y_);
metrics.setData(y, indices); metrics.setData(y, indices);
switch (algorithm) {
case 0:
computeCutPoints(0, X.size()); computeCutPoints(0, X.size());
break;
case 1:
computeCutPointsAlternative(0, X.size());
break;
case 2:
indices = sortIndices1(X_);
metrics.setData(y, indices);
computeCutPointsClassic(0, X.size());
break;
default:
throw invalid_argument("algorithm must be 0, 1 or 2");
}
return *this; return *this;
} }
precision_t CPPFImdlp::halfWayValueCutPoint(size_t start, size_t idx)
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
{ {
size_t idxPrev = idx - 1; size_t n, m, idxPrev = cut - 1 >= start ? cut - 1 : cut;
precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]]; size_t idxNext = cut + 1 < end ? cut + 1 : cut;
// definition 2 of the paper => X[t-1] < X[t] bool backWall; // true if duplicates reach begining of the interval
while (idxPrev-- > start && actual == previous) { precision_t previous, actual, next;
previous = X[indices[idxPrev]]; if (cut - 1 < start || cut + 1 >= end)
} throw logic_error("Invalid cutpoint index");
return (previous + actual) / 2;
}
tuple<precision_t, size_t> CPPFImdlp::completeValueCutPoint(size_t start, size_t cut, size_t end)
{
size_t idxPrev = cut - 1;
precision_t previous, actual;
previous = X[indices[idxPrev]]; previous = X[indices[idxPrev]];
actual = X[indices[cut]]; actual = X[indices[cut]];
next = X[indices[idxNext]];
// definition 2 of the paper => X[t-1] < X[t] // definition 2 of the paper => X[t-1] < X[t]
while (idxPrev-- > start && actual == previous) { // get the first equal value of X in the interval
previous = X[indices[idxPrev]]; while (idxPrev > start && actual == previous) {
previous = X[indices[--idxPrev]];
} }
backWall = idxPrev == start && actual == previous;
// get the last equal value of X in the interval // get the last equal value of X in the interval
while (actual == X[indices[cut++]] && cut < end); while (idxNext < end - 1 && actual == next) {
if (previous == actual && cut < end) next = X[indices[++idxNext]];
actual = X[indices[cut]];
cut--;
return make_tuple((previous + actual) / 2, cut);
} }
// # of duplicates before cutpoint
n = cut - 1 - idxPrev;
// # of duplicates after cutpoint
m = idxNext - cut - 1;
// Decide which values to use
cut = cut + (backWall ? m + 1 : -n);
actual = X[indices[cut]];
return { (actual + previous) / 2, cut };
}
void CPPFImdlp::computeCutPoints(size_t start, size_t end) void CPPFImdlp::computeCutPoints(size_t start, size_t end)
{ {
size_t cut; size_t cut;
tuple<precision_t, size_t> result; pair<precision_t, size_t> result;
if (end - start < 2) if (end - start < 3)
return; return;
cut = getCandidate(start, end); cut = getCandidate(start, end);
if (cut == numeric_limits<size_t>::max()) if (cut == numeric_limits<size_t>::max())
return; return;
if (mdlp(start, cut, end)) { if (mdlp(start, cut, end)) {
result = completeValueCutPoint(start, cut, end); result = valueCutPoint(start, cut, end);
cut = get<1>(result); cut = result.second;
cutPoints.push_back(get<0>(result)); cutPoints.push_back(result.first);
computeCutPoints(start, cut); computeCutPoints(start, cut);
computeCutPoints(cut, end); computeCutPoints(cut, end);
} }
} }
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
{
size_t cut;
if (end - start < 2)
return;
cut = getCandidate(start, end);
if (cut == numeric_limits<size_t>::max())
return;
if (mdlp(start, cut, end)) {
cutPoints.push_back(halfWayValueCutPoint(start, cut));
computeCutPointsAlternative(start, cut);
computeCutPointsAlternative(cut, end);
}
}
void CPPFImdlp::computeCutPointsClassic(size_t start, size_t end)
{
size_t cut;
cut = getCandidate(start, end);
if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
// cut.value == -1 means that there is no candidate in the interval
// No boundary found, so we add both ends of the interval as cutpoints
// because they were selected by the algorithm before
if (start == end)
return;
if (start != 0)
cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
if (end != X.size())
cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
return;
}
computeCutPoints(start, cut);
computeCutPoints(cut, end);
}
size_t CPPFImdlp::getCandidate(size_t start, size_t end) size_t CPPFImdlp::getCandidate(size_t start, size_t end)
{ {
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
E(A, TA; S) is minimal amogst all the candidate cut points. */ E(A, TA; S) is minimal amongst all the candidate cut points. */
size_t candidate = numeric_limits<size_t>::max(), elements = end - start; size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
bool sameValues = true;
precision_t entropy_left, entropy_right, minEntropy; precision_t entropy_left, entropy_right, minEntropy;
// Check if all the values of the variable in the interval are the same
for (size_t idx = start + 1; idx < end; idx++) {
if (X[indices[idx]] != X[indices[start]]) {
sameValues = false;
break;
}
}
if (sameValues)
return candidate;
minEntropy = metrics.entropy(start, end); minEntropy = metrics.entropy(start, end);
for (auto idx = start + 1; idx < end; idx++) { for (size_t idx = start + 1; idx < end; idx++) {
// Cutpoints are always on boundaries (definition 2) // Cutpoints are always on boundaries (definition 2)
if (y[indices[idx]] == y[indices[idx - 1]]) if (y[indices[idx]] == y[indices[idx - 1]])
continue; continue;
@@ -137,6 +109,7 @@ namespace mdlp {
} }
return candidate; return candidate;
} }
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
{ {
int k, k1, k2; int k, k1, k2;
@@ -158,32 +131,22 @@ namespace mdlp {
precision_t term = 1 / N * (log2(N - 1) + delta); precision_t term = 1 / N * (log2(N - 1) + delta);
return ig > term; return ig > term;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_) indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
{ {
indices_t idx(X_.size()); indices_t idx(X_.size());
iota(idx.begin(), idx.end(), 0); iota(idx.begin(), idx.end(), 0);
for (size_t i = 0; i < X_.size(); i++) for (size_t i = 0; i < X_.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
{ if (X_[i1] == X_[i2])
if (X_[i1] == X_[i2]) return y_[i1] < y_[i2]; return y_[i1] < y_[i2];
else else
return X_[i1] < X_[i2]; return X_[i1] < X_[i2];
}); });
return idx; return idx;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices1(samples_t& X_)
{
indices_t idx(X_.size());
iota(idx.begin(), idx.end(), 0);
for (size_t i = 0; i < X_.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
{
return X_[i1] < X_[i2];
});
return idx;
}
cutPoints_t CPPFImdlp::getCutPoints() cutPoints_t CPPFImdlp::getCutPoints()
{ {
// Remove duplicates and sort // Remove duplicates and sort

View File

@@ -3,12 +3,10 @@
#include "typesFImdlp.h" #include "typesFImdlp.h"
#include "Metrics.h" #include "Metrics.h"
#include <utility> #include <utility>
#include <tuple>
#include <string> #include <string>
namespace mdlp { namespace mdlp {
class CPPFImdlp { class CPPFImdlp {
protected: protected:
int algorithm;
indices_t indices; indices_t indices;
samples_t X; samples_t X;
labels_t y; labels_t y;
@@ -16,20 +14,16 @@ namespace mdlp {
cutPoints_t cutPoints; cutPoints_t cutPoints;
static indices_t sortIndices(samples_t&, labels_t&); static indices_t sortIndices(samples_t&, labels_t&);
static indices_t sortIndices1(samples_t&);
void computeCutPoints(size_t, size_t); void computeCutPoints(size_t, size_t);
void computeCutPointsAlternative(size_t, size_t);
void computeCutPointsClassic(size_t, size_t);
bool mdlp(size_t, size_t, size_t); bool mdlp(size_t, size_t, size_t);
size_t getCandidate(size_t, size_t); size_t getCandidate(size_t, size_t);
precision_t halfWayValueCutPoint(size_t, size_t); pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
tuple<precision_t, size_t> completeValueCutPoint(size_t, size_t, size_t);
public: public:
CPPFImdlp(int algorithm = 0); CPPFImdlp();
~CPPFImdlp(); ~CPPFImdlp();
CPPFImdlp& fit(samples_t&, labels_t&); CPPFImdlp& fit(samples_t&, labels_t&);
samples_t getCutPoints(); samples_t getCutPoints();
inline string version() { return "1.0.0"; }; inline string version() { return "1.1.0"; };
}; };
} }
#endif #endif

View File

@@ -29,8 +29,8 @@ namespace mdlp {
labels_t counts(numClasses + 1, 0); labels_t counts(numClasses + 1, 0);
if (end - start < 2) if (end - start < 2)
return 0; return 0;
if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) { if (entropyCache.find({ start, end }) != entropyCache.end()) {
return entropyCache[make_tuple(start, end)]; return entropyCache[{start, end}];
} }
for (auto i = &indices[start]; i != &indices[end]; ++i) { for (auto i = &indices[start]; i != &indices[end]; ++i) {
counts[y[*i]]++; counts[y[*i]]++;
@@ -42,7 +42,7 @@ namespace mdlp {
ventropy -= p * log2(p); ventropy -= p * log2(p);
} }
} }
entropyCache[make_tuple(start, end)] = ventropy; entropyCache[{start, end}] = ventropy;
return ventropy; return ventropy;
} }
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end) precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)

View File

@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(main) project(main)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 11)
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp) add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)

View File

@@ -7,19 +7,21 @@
using namespace std; using namespace std;
using namespace mdlp; using namespace mdlp;
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
ArffFiles file; ArffFiles file;
vector<string> lines;
string path = "../../tests/datasets/"; string path = "../../tests/datasets/";
map<string, bool> datasets = { map<string, bool> datasets = {
{"mfeat-factors", true}, {"mfeat-factors", true},
{"iris", true}, {"iris", true},
{"letter", true}, {"letter", true},
{"kdd_JapaneseVowels", false} {"glass", true},
{"kdd_JapaneseVowels", false},
{"test", true}
}; };
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) { if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl; cout << "Usage: " << argv[0] << " {mfeat-factors, glass, iris, letter, kdd_JapaneseVowels, test}" << endl;
return 1; return 1;
} }
@@ -42,9 +44,11 @@ int main(int argc, char** argv)
} }
cout << y[i] << endl; cout << y[i] << endl;
} }
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0); mdlp::CPPFImdlp test = mdlp::CPPFImdlp();
for (auto i = 0; i < attributes.size(); i++) { for (auto i = 0; i < attributes.size(); i++) {
auto min_max = minmax_element(X[i].begin(), X[i].end());
cout << "Cut points for " << get<0>(attributes[i]) << endl; cout << "Cut points for " << get<0>(attributes[i]) << endl;
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
cout << "--------------------------" << setprecision(3) << endl; cout << "--------------------------" << setprecision(3) << endl;
test.fit(X[i], y); test.fit(X[i], y);
for (auto item : test.getCutPoints()) { for (auto item : test.getCutPoints()) {

View File

@@ -17,7 +17,7 @@ unsigned long int ArffFiles::getSize()
{ {
return lines.size(); return lines.size();
} }
vector<tuple<string, string>> ArffFiles::getAttributes() vector<pair<string, string>> ArffFiles::getAttributes()
{ {
return attributes; return attributes;
} }
@@ -50,7 +50,7 @@ void ArffFiles::load(string fileName, bool classLast)
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line); stringstream ss(line);
ss >> keyword >> attribute >> type; ss >> keyword >> attribute >> type;
attributes.push_back(make_tuple(attribute, type)); attributes.push_back({ attribute, type });
continue; continue;
} }
if (line[0] == '@') { if (line[0] == '@') {

View File

@@ -2,12 +2,11 @@
#define ARFFFILES_H #define ARFFFILES_H
#include <string> #include <string>
#include <vector> #include <vector>
#include <tuple>
using namespace std; using namespace std;
class ArffFiles { class ArffFiles {
private: private:
vector<string> lines; vector<string> lines;
vector<tuple<string, string>> attributes; vector<pair<string, string>> attributes;
string className, classType; string className, classType;
vector<vector<float>> X; vector<vector<float>> X;
vector<int> y; vector<int> y;
@@ -22,7 +21,7 @@ public:
string trim(const string&); string trim(const string&);
vector<vector<float>>& getX(); vector<vector<float>>& getX();
vector<int>& getY(); vector<int>& getY();
vector<tuple<string, string>> getAttributes(); vector<pair<string, string>> getAttributes();
vector<int> factorize(const vector<string>& labels_t); vector<int> factorize(const vector<string>& labels_t);
}; };
#endif #endif

View File

@@ -13,18 +13,13 @@ namespace mdlp {
{ {
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
algorithm = false;
fit(X, y); fit(X, y);
} }
void setalgorithm(bool value)
{
algorithm = value;
}
void checkSortedVector() void checkSortedVector()
{ {
indices_t testSortedIndices = sortIndices(X, y); indices_t testSortedIndices = sortIndices(X, y);
precision_t prev = X[testSortedIndices[0]]; precision_t prev = X[testSortedIndices[0]];
for (auto i = 0; i < X.size(); ++i) { for (unsigned long i = 0; i < X.size(); ++i) {
EXPECT_EQ(testSortedIndices[i], indices[i]); EXPECT_EQ(testSortedIndices[i], indices[i]);
EXPECT_LE(prev, X[testSortedIndices[i]]); EXPECT_LE(prev, X[testSortedIndices[i]]);
prev = X[testSortedIndices[i]]; prev = X[testSortedIndices[i]];
@@ -34,7 +29,7 @@ namespace mdlp {
{ {
int expectedSize = expected.size(); int expectedSize = expected.size();
EXPECT_EQ(cutPoints.size(), expectedSize); EXPECT_EQ(cutPoints.size(), expectedSize);
for (auto i = 0; i < cutPoints.size(); i++) { for (unsigned long i = 0; i < cutPoints.size(); i++) {
EXPECT_NEAR(cutPoints[i], expected[i], precision); EXPECT_NEAR(cutPoints[i], expected[i], precision);
} }
} }
@@ -47,6 +42,19 @@ namespace mdlp {
EXPECT_NEAR(expected[i], computed[i], precision); EXPECT_NEAR(expected[i], computed[i], precision);
} }
} }
bool test_result(samples_t& X_, size_t cut, float midPoint, size_t limit, string title)
{
pair<precision_t, size_t> result;
labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
X = X_;
y = y_;
indices = sortIndices(X, y);
cout << "* " << title << endl;
result = valueCutPoint(0, cut, 10);
EXPECT_NEAR(result.first, midPoint, precision);
EXPECT_EQ(result.second, limit);
return true;
}
}; };
TEST_F(TestFImdlp, FitErrorEmptyDataset) TEST_F(TestFImdlp, FitErrorEmptyDataset)
{ {
@@ -54,11 +62,6 @@ namespace mdlp {
y = labels_t(); y = labels_t();
EXPECT_THROW(fit(X, y), std::invalid_argument); EXPECT_THROW(fit(X, y), std::invalid_argument);
} }
TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm)
{
algorithm = 2;
EXPECT_THROW(fit(X, y), std::invalid_argument);
}
TEST_F(TestFImdlp, FitErrorDifferentSize) TEST_F(TestFImdlp, FitErrorDifferentSize)
{ {
X = { 1, 2, 3 }; X = { 1, 2, 3 };
@@ -83,31 +86,41 @@ namespace mdlp {
y = { 2, 2, 1 }; y = { 2, 2, 1 };
indices = { 1, 2, 0 }; indices = { 1, 2, 0 };
} }
TEST_F(TestFImdlp, TestArtificialDatasetAlternative) TEST_F(TestFImdlp, TestShortDatasets)
{ {
algorithm = 1; vector<precision_t> computed;
X = { 1 };
y = { 1 };
fit(X, y); fit(X, y);
computeCutPoints(0, 20);
cutPoints_t expected = { 5.0500001907348633 };
vector<precision_t> computed = getCutPoints();
computed = getCutPoints(); computed = getCutPoints();
int expectedSize = expected.size(); EXPECT_EQ(computed.size(), 0);
EXPECT_EQ(computed.size(), expected.size()); X = { 1, 3 };
for (auto i = 0; i < computed.size(); i++) { y = { 1, 2 };
EXPECT_NEAR(computed[i], expected[i], precision); fit(X, y);
} computed = getCutPoints();
EXPECT_EQ(computed.size(), 0);
X = { 2, 4 };
y = { 1, 2 };
fit(X, y);
computed = getCutPoints();
EXPECT_EQ(computed.size(), 0);
X = { 1, 2, 3 };
y = { 1, 2, 2 };
fit(X, y);
computed = getCutPoints();
EXPECT_EQ(computed.size(), 1);
EXPECT_NEAR(computed[0], 1.5, precision);
} }
TEST_F(TestFImdlp, TestArtificialDataset) TEST_F(TestFImdlp, TestArtificialDataset)
{ {
algorithm = 0;
fit(X, y); fit(X, y);
computeCutPoints(0, 20); computeCutPoints(0, 20);
cutPoints_t expected = { 5.0500001907348633 }; cutPoints_t expected = { 5.05 };
vector<precision_t> computed = getCutPoints(); vector<precision_t> computed = getCutPoints();
computed = getCutPoints(); computed = getCutPoints();
int expectedSize = expected.size(); int expectedSize = expected.size();
EXPECT_EQ(computed.size(), expected.size()); EXPECT_EQ(computed.size(), expected.size());
for (auto i = 0; i < computed.size(); i++) { for (unsigned long i = 0; i < computed.size(); i++) {
EXPECT_NEAR(computed[i], expected[i], precision); EXPECT_NEAR(computed[i], expected[i], precision);
} }
} }
@@ -116,44 +129,17 @@ namespace mdlp {
ArffFiles file; ArffFiles file;
string path = "../datasets/"; string path = "../datasets/";
file.load(path + "iris.arff", true);
int items = file.getSize();
vector<samples_t>& X = file.getX();
vector<cutPoints_t> expected = {
{ 5.4499998092651367, 6.25 },
{ 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 },
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
{ 0.80000001192092896, 1.4500000476837158, 1.75 }
};
labels_t& y = file.getY();
auto attributes = file.getAttributes();
algorithm = 0;
for (auto feature = 0; feature < attributes.size(); feature++) {
fit(X[feature], y);
vector<precision_t> computed = getCutPoints();
EXPECT_EQ(computed.size(), expected[feature].size());
for (auto i = 0; i < computed.size(); i++) {
EXPECT_NEAR(computed[i], expected[feature][i], precision);
}
}
}
TEST_F(TestFImdlp, TestIrisAlternative)
{
ArffFiles file;
string path = "../datasets/";
file.load(path + "iris.arff", true); file.load(path + "iris.arff", true);
int items = file.getSize(); int items = file.getSize();
vector<samples_t>& X = file.getX(); vector<samples_t>& X = file.getX();
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{ 5.4499998092651367, 5.75 }, { 5.4499998092651367, 5.75 },
{ 2.8499999046325684, 3.3499999046325684 }, { 2.75, 2.85, 2.95, 3.05, 3.35 },
{ 2.4500000476837158, 4.75 }, { 2.4500000476837158, 4.75, 5.0500001907348633 },
{ 0.80000001192092896, 1.75 } { 0.80000001192092896, 1.75 }
}; };
labels_t& y = file.getY(); labels_t& y = file.getY();
auto attributes = file.getAttributes(); auto attributes = file.getAttributes();
algorithm = 1;
for (auto feature = 0; feature < attributes.size(); feature++) { for (auto feature = 0; feature < attributes.size(); feature++) {
fit(X[feature], y); fit(X[feature], y);
vector<precision_t> computed = getCutPoints(); vector<precision_t> computed = getCutPoints();
@@ -166,21 +152,30 @@ namespace mdlp {
TEST_F(TestFImdlp, ComputeCutPointsGCase) TEST_F(TestFImdlp, ComputeCutPointsGCase)
{ {
cutPoints_t expected; cutPoints_t expected;
algorithm = 0;
expected = { 1.5 }; expected = { 1.5 };
samples_t X_ = { 0, 1, 2, 2 }; samples_t X_ = { 0, 1, 2, 2, 2 };
labels_t y_ = { 1, 1, 1, 2 }; labels_t y_ = { 1, 1, 1, 2, 2 };
fit(X_, y_); fit(X_, y_);
checkCutPoints(expected); checkCutPoints(expected);
} }
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase) TEST_F(TestFImdlp, ValueCutPoint)
{ {
cutPoints_t expected; // Case titles as stated in the doc
expected = { 1.5 }; samples_t X1a{ 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 };
algorithm = true; test_result(X1a, 6, 7.3 / 2, 6, "1a");
samples_t X_ = { 0, 1, 2, 2 }; samples_t X2a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
labels_t y_ = { 1, 1, 1, 2 }; test_result(X2a, 6, 7.1 / 2, 4, "2a");
fit(X_, y_); samples_t X2b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
checkCutPoints(expected); test_result(X2b, 6, 7.5 / 2, 7, "2b");
samples_t X3a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
test_result(X3a, 4, 7.1 / 2, 4, "3a");
samples_t X3b = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
test_result(X3b, 4, 7.1 / 2, 4, "3b");
samples_t X4a = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.9, 4.0 };
test_result(X4a, 4, 6.9 / 2, 2, "4a");
samples_t X4b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
test_result(X4b, 4, 7.5 / 2, 7, "4b");
samples_t X4c = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
test_result(X4c, 4, 6.9 / 2, 2, "4c");
} }
} }

332
tests/datasets/glass.arff Executable file
View File

@@ -0,0 +1,332 @@
% 1. Title: Glass Identification Database
%
% 2. Sources:
% (a) Creator: B. German
% -- Central Research Establishment
% Home Office Forensic Science Service
% Aldermaston, Reading, Berkshire RG7 4PN
% (b) Donor: Vina Spiehler, Ph.D., DABFT
% Diagnostic Products Corporation
% (213) 776-0180 (ext 3014)
% (c) Date: September, 1987
%
% 3. Past Usage:
% -- Rule Induction in Forensic Science
% -- Ian W. Evett and Ernest J. Spiehler
% -- Central Research Establishment
% Home Office Forensic Science Service
% Aldermaston, Reading, Berkshire RG7 4PN
% -- Unknown technical note number (sorry, not listed here)
% -- General Results: nearest neighbor held its own with respect to the
% rule-based system
%
% 4. Relevant Information:n
% Vina conducted a comparison test of her rule-based system, BEAGLE, the
% nearest-neighbor algorithm, and discriminant analysis. BEAGLE is
% a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
% Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
% In determining whether the glass was a type of "float" glass or not,
% the following results were obtained (# incorrect answers):
%
% Type of Sample Beagle NN DA
% Windows that were float processed (87) 10 12 21
% Windows that were not: (76) 19 16 22
%
% The study of classification of types of glass was motivated by
% criminological investigation. At the scene of the crime, the glass left
% can be used as evidence...if it is correctly identified!
%
% 5. Number of Instances: 214
%
% 6. Number of Attributes: 10 (including an Id#) plus the class attribute
% -- all attributes are continuously valued
%
% 7. Attribute Information:
% 1. Id number: 1 to 214
% 2. RI: refractive index
% 3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as
% are attributes 4-10)
% 4. Mg: Magnesium
% 5. Al: Aluminum
% 6. Si: Silicon
% 7. K: Potassium
% 8. Ca: Calcium
% 9. Ba: Barium
% 10. Fe: Iron
% 11. Type of glass: (class attribute)
% -- 1 building_windows_float_processed
% -- 2 building_windows_non_float_processed
% -- 3 vehicle_windows_float_processed
% -- 4 vehicle_windows_non_float_processed (none in this database)
% -- 5 containers
% -- 6 tableware
% -- 7 headlamps
%
% 8. Missing Attribute Values: None
%
% Summary Statistics:
% Attribute: Min Max Mean SD Correlation with class
% 2. RI: 1.5112 1.5339 1.5184 0.0030 -0.1642
% 3. Na: 10.73 17.38 13.4079 0.8166 0.5030
% 4. Mg: 0 4.49 2.6845 1.4424 -0.7447
% 5. Al: 0.29 3.5 1.4449 0.4993 0.5988
% 6. Si: 69.81 75.41 72.6509 0.7745 0.1515
% 7. K: 0 6.21 0.4971 0.6522 -0.0100
% 8. Ca: 5.43 16.19 8.9570 1.4232 0.0007
% 9. Ba: 0 3.15 0.1750 0.4972 0.5751
% 10. Fe: 0 0.51 0.0570 0.0974 -0.1879
%
% 9. Class Distribution: (out of 214 total instances)
% -- 163 Window glass (building windows and vehicle windows)
% -- 87 float processed
% -- 70 building windows
% -- 17 vehicle windows
% -- 76 non-float processed
% -- 76 building windows
% -- 0 vehicle windows
% -- 51 Non-window glass
% -- 13 containers
% -- 9 tableware
% -- 29 headlamps
%
%
%
%
%
%
%
% Relabeled values in attribute 'Type'
% From: '1' To: 'build wind float'
% From: '2' To: 'build wind non-float'
% From: '3' To: 'vehic wind float'
% From: '4' To: 'vehic wind non-float'
% From: '5' To: containers
% From: '6' To: tableware
% From: '7' To: headlamps
%
@relation Glass
@attribute 'RI' real
@attribute 'Na' real
@attribute 'Mg' real
@attribute 'Al' real
@attribute 'Si' real
@attribute 'K' real
@attribute 'Ca' real
@attribute 'Ba' real
@attribute 'Fe' real
@attribute 'Type' { 'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
@data
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0,0,'build wind float'
1.51299,14.4,1.74,1.54,74.55,0,7.59,0,0,tableware
1.53393,12.3,0,1,70.16,0.12,16.19,0,0.24,'build wind non-float'
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,'build wind non-float'
1.51779,13.64,3.65,0.65,73,0.06,8.93,0,0,'vehic wind float'
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0,0,'build wind float'
1.51545,14.14,0,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0,0.28,'build wind non-float'
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0,0,'build wind non-float'
1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0,0.24,'build wind non-float'
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0,0,'build wind float'
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0,0,'vehic wind float'
1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0,0.17,'vehic wind float'
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0,0,'build wind non-float'
1.51719,14.75,0,2,73.02,0,8.53,1.59,0.08,headlamps
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0,0,'build wind non-float'
1.51994,13.27,0,1.76,73.03,0.47,11.32,0,0,containers
1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0,'build wind non-float'
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0,0.17,'build wind float'
1.52475,11.45,0,1.88,72.19,0.81,13.24,0,0.34,'build wind non-float'
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0,0.22,'build wind non-float'
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0,0.11,'build wind float'
1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,containers
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0,0,'build wind non-float'
1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0,0,'build wind non-float'
1.51683,14.56,0,1.98,73.29,0,8.52,1.57,0.07,headlamps
1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0,0,'build wind non-float'
1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0,0,'vehic wind float'
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0,0,'build wind non-float'
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0,0,'vehic wind float'
1.51115,17.38,0,0.34,75.41,0,6.65,0,0,tableware
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0,0,'build wind non-float'
1.51755,13,3.6,1.36,72.99,0.57,8.4,0,0.11,'build wind float'
1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0,0.24,'build wind float'
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,'build wind float'
1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0,0,'build wind non-float'
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,'build wind non-float'
1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0,0,'build wind non-float'
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0,0,'build wind float'
1.51806,13,3.8,1.08,73.07,0.56,8.38,0,0.12,'build wind non-float'
1.51627,13,3.58,1.54,72.83,0.61,8.04,0,0,'build wind non-float'
1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0,0,'build wind non-float'
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,'vehic wind float'
1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0,0,'build wind float'
1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0,containers
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0,'build wind float'
1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0,0,'build wind float'
1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0,0,'build wind non-float'
1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0,0.06,'build wind float'
1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,'build wind non-float'
1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0,0,'build wind float'
1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0,0,'build wind non-float'
1.5164,14.37,0,2.74,72.85,0,9.45,0.54,0,headlamps
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0,0.07,'build wind float'
1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0,0,headlamps
1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0,0,'build wind float'
1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0,0,'vehic wind float'
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0,0,'build wind non-float'
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0,0.32,'build wind non-float'
1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0,0,'build wind float'
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0,0,'build wind float'
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0,0.16,'build wind float'
1.51556,13.87,0,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0,0.11,'build wind float'
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0,0.37,'vehic wind float'
1.53125,10.73,0,2.1,69.81,0.58,13.3,3.15,0.28,'build wind non-float'
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0,0.17,'build wind float'
1.51829,14.46,2.24,1.62,72.38,0,9.26,0,0,tableware
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0,0.14,'build wind non-float'
1.51888,14.99,0.78,1.74,72.5,0,9.95,0,0,tableware
1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0,0.1,'build wind non-float'
1.523,13.31,3.58,0.82,71.99,0.12,10.17,0,0.03,'build wind float'
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0,0,'build wind non-float'
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0,0,'build wind float'
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0,0.31,'build wind float'
1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0,0,'vehic wind float'
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0,0,'build wind float'
1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0,0,'build wind float'
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0,'build wind float'
1.52127,14.32,3.9,0.83,71.5,0,9.49,0,0,'vehic wind float'
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0,0,'build wind float'
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0,0,containers
1.518,13.71,3.93,1.54,71.81,0.54,8.21,0,0.15,'build wind non-float'
1.52777,12.64,0,0.67,72.02,0.06,14.4,0,0,'build wind non-float'
1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0,0.19,'build wind float'
1.51764,12.98,3.54,1.21,73,0.65,8.53,0,0,'build wind float'
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0,0,'build wind non-float'
1.51645,14.94,0,1.87,73.11,0,8.67,1.38,0,headlamps
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0,0.3,'build wind float'
1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0,0.16,'build wind float'
1.51937,13.79,2.41,1.19,72.76,0,9.77,0,0,tableware
1.51514,14.85,0,2.42,73.72,0,8.39,0.56,0,headlamps
1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0,0.07,'build wind float'
1.51732,14.95,0,1.8,72.99,0,8.61,1.55,0,headlamps
1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0,0.18,'build wind non-float'
1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0,0,'build wind non-float'
1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0,0,'build wind non-float'
1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0,0,'build wind float'
1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0,0.29,'build wind non-float'
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0,headlamps
1.51685,14.92,0,1.99,73.06,0,8.4,1.59,0,headlamps
1.51658,14.8,0,1.99,73.11,0,8.28,1.71,0,headlamps
1.51316,13.02,0,3.04,70.48,6.21,6.96,0,0,containers
1.51709,13,3.47,1.79,72.72,0.66,8.18,0,0,'build wind non-float'
1.51727,14.7,0,2.34,73.28,0,8.95,0.66,0,headlamps
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0,0,'build wind float'
1.51969,12.64,0,1.65,73.75,0.38,11.53,0,0,containers
1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0,0.2,'build wind non-float'
1.51617,14.95,0,2.27,73.3,0,8.71,0.67,0,headlamps
1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0,0,'build wind float'
1.51651,14.38,0,1.94,73.61,0,8.48,1.57,0,headlamps
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0,0,'vehic wind float'
1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0,0,headlamps
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,'build wind non-float'
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0,headlamps
1.51818,13.72,0,0.56,74.45,0,10.99,0,0,'build wind non-float'
1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0,0.24,'build wind float'
1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0,0.24,'build wind non-float'
1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0,0.24,'build wind float'
1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0,0,'build wind non-float'
1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0,0,'build wind non-float'
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0,0.28,containers
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0,0.17,'build wind float'
1.51653,11.95,0,1.19,75.18,2.7,8.93,0,0,headlamps
1.51623,14.14,0,2.88,72.61,0.08,9.18,1.06,0,headlamps
1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0,'build wind float'
1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0,0,'build wind float'
1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0,0,'build wind non-float'
1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0,0,'build wind non-float'
1.52065,14.36,0,2.02,73.42,0,8.44,1.64,0,headlamps
1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0,0.14,'build wind float'
1.52369,13.44,0,1.58,72.22,0.32,12.24,0,0,containers
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0,'build wind float'
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0,0,'build wind float'
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0,0,'build wind non-float'
1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0,0,'build wind float'
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0,0.09,'build wind non-float'
1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0,0,'build wind float'
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0,'build wind float'
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0,'build wind float'
1.51666,12.86,0,1.83,73.88,0.97,10.17,0,0,containers
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0,0,'build wind non-float'
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0,0.12,'build wind non-float'
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0,0,'build wind non-float'
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0,0.17,'build wind non-float'
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0,0.12,'build wind non-float'
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'
1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0,headlamps
1.52227,14.17,3.81,0.78,71.35,0,9.69,0,0,'build wind float'
1.52614,13.7,0,1.36,71.24,0.19,13.44,0,0.1,'build wind non-float'
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0,0,'build wind non-float'
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0,0,'vehic wind float'
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0,0,'build wind float'
1.51508,15.15,0,2.25,73.5,0,8.34,0.63,0,headlamps
1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0,0,containers
1.51966,14.77,3.75,0.29,72.02,0.03,9,0,0,'build wind float'
1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0,0,'build wind non-float'
1.52664,11.23,0,0.77,73.21,0,14.68,0,0,'build wind non-float'
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0,0.11,'build wind float'
1.51602,14.85,0,2.38,73.28,0,8.76,0.64,0.09,headlamps
1.51321,13,0,3.02,70.7,6.21,6.93,0,0,containers
1.52739,11.02,0,0.75,73.08,0,14.96,0,0,'build wind non-float'
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0,0,'build wind float'
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0,0.35,'build wind non-float'
1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0,0,'build wind non-float'
1.51609,15.01,0,2.51,73.05,0.05,8.83,0.53,0,headlamps
1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0,0,'build wind non-float'
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0,0.19,'build wind non-float'
1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0,0.1,'build wind float'
1.51831,14.39,0,1.82,72.86,1.41,6.47,2.88,0,headlamps
1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0,0,'build wind float'
1.51613,13.88,1.78,1.79,73.1,0,8.67,0.76,0,headlamps
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0,0,'build wind float'
1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0,0,'build wind float'
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0,0,containers
1.51969,14.56,0,0.56,73.48,0,11.22,0,0,tableware
1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0,0,'build wind non-float'
1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0,0.1,'build wind non-float'
1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0,0.09,'vehic wind float'
1.52222,14.43,0,1,72.67,0.1,11.52,0,0.08,'build wind non-float'
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0,0,'build wind float'
1.51711,14.23,0,2.08,73.36,0,8.62,1.67,0,headlamps
1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0,0,'build wind float'
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0,0,'build wind float'
1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0,0.1,'vehic wind float'
1.52043,13.38,0,1.4,72.25,0.33,12.5,0,0,containers
1.519,13.49,3.48,1.35,71.95,0.55,9,0,0,'build wind float'
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0,0.09,'build wind float'
1.51905,14,2.39,1.56,72.37,0,9.57,0,0,tableware
1.51531,14.38,0,2.66,73.1,0.04,9.08,0.64,0,headlamps
1.51916,14.15,0,2.09,72.74,0,10.88,0,0,tableware
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0,0.15,'build wind non-float'
1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0,0,'build wind non-float'
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0,0,'build wind non-float'
1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0,0.09,'build wind non-float'
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0,0.21,'build wind non-float'
1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0,0,'build wind non-float'
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0,0.16,'build wind float'
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0,0,'vehic wind float'
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,'build wind float'
1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0,0,'build wind non-float'
1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0,0,'build wind float'
1.51623,14.2,0,2.79,73.46,0.04,9.04,0.4,0.09,headlamps
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0,'build wind float'
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0,0,'build wind float'
1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0,0,'vehic wind float'
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0,0,'build wind non-float'
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0,0.14,'build wind non-float'
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0,0,'build wind non-float'
1.51852,14.09,2.19,1.66,72.67,0,9.32,0,0,tableware

180
tests/datasets/test.arff Executable file
View File

@@ -0,0 +1,180 @@
% 1. Title: Test Feature extracted from Glass
%
@RELATION test
@ATTRIBUTE Mg REAL
@ATTRIBUTE Type {0,1,2,3,4,5,6}
@DATA
3.5,0
3.52,1
1.74,2
0.0,3
2.85,3
3.65,1
2.84,0
0.0,4
3.9,3
3.58,3
3.25,3
3.76,1
3.45,1
3.48,3
0.0,4
0.0,5
2.96,3
3.65,0
0.0,3
3.74,3
3.66,0
1.61,5
3.49,3
3.52,3
3.54,3
3.53,1
3.56,3
3.34,1
0.0,2
3.61,3
3.6,0
3.46,0
2.72,3
3.51,3
3.09,3
3.48,0
3.8,3
3.58,3
3.54,1
3.42,0
2.68,5
3.49,0
3.68,3
3.6,0
3.59,3
0.0,4
3.54,0
2.2,4
3.59,0
3.66,1
3.87,3
3.45,0
3.82,0
3.72,0
3.33,0
3.78,1
2.24,2
3.83,3
0.78,2
3.9,3
3.58,0
3.57,3
3.52,0
3.47,0
3.48,0
3.66,0
3.62,0
3.39,0
0.0,3
3.55,0
1.01,3
0.0,4
3.43,0
3.58,0
0.0,4
3.74,0
0.0,4
3.44,3
3.97,3
3.6,0
3.64,3
1.83,4
0.0,4
0.0,5
0.0,4
0.0,5
0.0,4
3.73,0
3.58,1
3.34,4
2.09,3
2.71,0
3.18,3
3.43,0
3.15,3
3.56,0
0.0,4
0.0,4
4.49,0
3.59,0
3.56,3
3.52,3
0.0,4
0.0,5
3.61,0
3.74,0
3.62,3
3.84,0
3.67,0
3.58,0
0.0,5
3.66,3
3.68,3
2.28,3
3.67,3
3.2,4
3.81,0
0.0,3
3.39,1
3.57,0
1.85,5
3.75,0
3.76,3
0.0,3
3.86,0
0.0,4
0.0,5
0.0,3
3.5,0
3.67,3
3.55,3
0.0,4
3.61,3
3.41,3
3.7,0
0.0,4
3.58,0
1.78,4
3.85,0
3.48,0
1.71,5
0.0,2
3.5,3
3.49,3
3.36,1
0.0,3
3.54,0
0.0,4
2.87,0
3.57,1
3.48,0
2.81,0
0.0,4
0.0,2
3.62,3
3.58,3
3.45,3
3.48,3
3.54,3
3.54,3
3.37,0
3.41,1
3.48,0
3.43,3
3.5,0
0.0,4
3.54,0
3.52,3
3.52,3
2.88,3
2.19,2

View File

@@ -11,7 +11,7 @@ namespace mdlp {
typedef vector<int> labels_t; typedef vector<int> labels_t;
typedef vector<size_t> indices_t; typedef vector<size_t> indices_t;
typedef vector<precision_t> cutPoints_t; typedef vector<precision_t> cutPoints_t;
typedef map<tuple<int, int>, precision_t> cacheEnt_t; typedef map<pair<int, int>, precision_t> cacheEnt_t;
typedef map<tuple<int, int, int>, precision_t> cacheIg_t; typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
} }
#endif #endif