mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 15:35:55 +00:00
Merge pull request #3 from rmontanana/refactorValueCutPoint
- Had to deal with duplicated values in the variables and each practical case. - Removed unused alternative versions of the algorithm. - Now, intervals with the same value of the variable are not taken into account for cutpoints. - Intervals have to have more than two examples to be evaluated - Added some more datasets to the sample
This commit is contained in:
26
.github/workflows/build.yml
vendored
Normal file
26
.github/workflows/build.yml
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||
- uses: sonarsource/sonarqube-scan-action@master
|
||||
env:
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
|
||||
# If you wish to fail your job when the Quality Gate is red, uncomment the
|
||||
# following lines. This would typically be used to fail a deployment.
|
||||
# - uses: sonarsource/sonarqube-quality-gate-action@master
|
||||
# timeout-minutes: 5
|
||||
# env:
|
||||
# SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -35,3 +35,4 @@
|
||||
.idea
|
||||
cmake-*
|
||||
**/CMakeFiles
|
||||
sonar-project.properties
|
||||
|
22
.vscode/launch.json
vendored
Normal file
22
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "(lldb) Launch",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/sample/build/sample",
|
||||
"args": [
|
||||
"mfeat-factors"
|
||||
],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceRoot}/sample/build/",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "lldb"
|
||||
}
|
||||
]
|
||||
}
|
5
.vscode/settings.json
vendored
Normal file
5
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"sonarlint.connectedMode.project": {
|
||||
"projectKey": "rmontanana_mdlp_AYZkjILJHyjW-meBaElG"
|
||||
}
|
||||
}
|
29
.vscode/tasks.json
vendored
Normal file
29
.vscode/tasks.json
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"tasks": [
|
||||
{
|
||||
"type": "cppbuild",
|
||||
"label": "C/C++: clang++ build active file",
|
||||
"command": "/usr/bin/clang++",
|
||||
"args": [
|
||||
"-fcolor-diagnostics",
|
||||
"-fansi-escape-codes",
|
||||
"-g",
|
||||
"${file}",
|
||||
"-o",
|
||||
"${fileDirname}/${fileBasenameNoExtension}"
|
||||
],
|
||||
"options": {
|
||||
"cwd": "${fileDirname}"
|
||||
},
|
||||
"problemMatcher": [
|
||||
"$gcc"
|
||||
],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"detail": "Task generated by Debugger."
|
||||
}
|
||||
],
|
||||
"version": "2.0.0"
|
||||
}
|
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(mdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||
|
||||
|
149
CPPFImdlp.cpp
149
CPPFImdlp.cpp
@@ -4,12 +4,15 @@
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||
|
||||
CPPFImdlp::CPPFImdlp(): indices(indices_t()), X(samples_t()), y(labels_t()),
|
||||
metrics(Metrics(y, indices))
|
||||
{
|
||||
}
|
||||
CPPFImdlp::~CPPFImdlp()
|
||||
= default;
|
||||
CPPFImdlp::~CPPFImdlp() = default;
|
||||
|
||||
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
@@ -18,113 +21,82 @@ namespace mdlp {
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.size() == 0 || y.size() == 0) {
|
||||
if (X.empty() || y.empty()) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
switch (algorithm) {
|
||||
case 0:
|
||||
computeCutPoints(0, X.size());
|
||||
break;
|
||||
case 1:
|
||||
computeCutPointsAlternative(0, X.size());
|
||||
break;
|
||||
case 2:
|
||||
indices = sortIndices1(X_);
|
||||
metrics.setData(y, indices);
|
||||
computeCutPointsClassic(0, X.size());
|
||||
break;
|
||||
default:
|
||||
throw invalid_argument("algorithm must be 0, 1 or 2");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
precision_t CPPFImdlp::halfWayValueCutPoint(size_t start, size_t idx)
|
||||
|
||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t idxPrev = idx - 1;
|
||||
precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
return (previous + actual) / 2;
|
||||
}
|
||||
tuple<precision_t, size_t> CPPFImdlp::completeValueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t idxPrev = cut - 1;
|
||||
precision_t previous, actual;
|
||||
size_t n, m, idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||
bool backWall; // true if duplicates reach begining of the interval
|
||||
precision_t previous, actual, next;
|
||||
if (cut - 1 < start || cut + 1 >= end)
|
||||
throw logic_error("Invalid cutpoint index");
|
||||
previous = X[indices[idxPrev]];
|
||||
actual = X[indices[cut]];
|
||||
next = X[indices[idxNext]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
// get the first equal value of X in the interval
|
||||
while (idxPrev > start && actual == previous) {
|
||||
previous = X[indices[--idxPrev]];
|
||||
}
|
||||
backWall = idxPrev == start && actual == previous;
|
||||
// get the last equal value of X in the interval
|
||||
while (actual == X[indices[cut++]] && cut < end);
|
||||
if (previous == actual && cut < end)
|
||||
actual = X[indices[cut]];
|
||||
cut--;
|
||||
return make_tuple((previous + actual) / 2, cut);
|
||||
while (idxNext < end - 1 && actual == next) {
|
||||
next = X[indices[++idxNext]];
|
||||
}
|
||||
// # of duplicates before cutpoint
|
||||
n = cut - 1 - idxPrev;
|
||||
// # of duplicates after cutpoint
|
||||
m = idxNext - cut - 1;
|
||||
// Decide which values to use
|
||||
cut = cut + (backWall ? m + 1 : -n);
|
||||
actual = X[indices[cut]];
|
||||
return { (actual + previous) / 2, cut };
|
||||
}
|
||||
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
tuple<precision_t, size_t> result;
|
||||
if (end - start < 2)
|
||||
pair<precision_t, size_t> result;
|
||||
if (end - start < 3)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = completeValueCutPoint(start, cut, end);
|
||||
cut = get<1>(result);
|
||||
cutPoints.push_back(get<0>(result));
|
||||
result = valueCutPoint(start, cut, end);
|
||||
cut = result.second;
|
||||
cutPoints.push_back(result.first);
|
||||
computeCutPoints(start, cut);
|
||||
computeCutPoints(cut, end);
|
||||
}
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
cutPoints.push_back(halfWayValueCutPoint(start, cut));
|
||||
computeCutPointsAlternative(start, cut);
|
||||
computeCutPointsAlternative(cut, end);
|
||||
}
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsClassic(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
|
||||
// cut.value == -1 means that there is no candidate in the interval
|
||||
// No boundary found, so we add both ends of the interval as cutpoints
|
||||
// because they were selected by the algorithm before
|
||||
if (start == end)
|
||||
return;
|
||||
if (start != 0)
|
||||
cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
|
||||
if (end != X.size())
|
||||
cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
|
||||
return;
|
||||
}
|
||||
computeCutPoints(start, cut);
|
||||
computeCutPoints(cut, end);
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||
bool sameValues = true;
|
||||
precision_t entropy_left, entropy_right, minEntropy;
|
||||
// Check if all the values of the variable in the interval are the same
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
if (X[indices[idx]] != X[indices[start]]) {
|
||||
sameValues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sameValues)
|
||||
return candidate;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (auto idx = start + 1; idx < end; idx++) {
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
@@ -137,6 +109,7 @@ namespace mdlp {
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k, k1, k2;
|
||||
@@ -158,32 +131,22 @@ namespace mdlp {
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
for (size_t i = 0; i < X_.size(); i++)
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2)
|
||||
{
|
||||
if (X_[i1] == X_[i2]) return y_[i1] < y_[i2];
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||
if (X_[i1] == X_[i2])
|
||||
return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices1(samples_t& X_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
for (size_t i = 0; i < X_.size(); i++)
|
||||
stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
|
||||
{
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
cutPoints_t CPPFImdlp::getCutPoints()
|
||||
{
|
||||
// Remove duplicates and sort
|
||||
|
12
CPPFImdlp.h
12
CPPFImdlp.h
@@ -3,12 +3,10 @@
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <utility>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
int algorithm;
|
||||
indices_t indices;
|
||||
samples_t X;
|
||||
labels_t y;
|
||||
@@ -16,20 +14,16 @@ namespace mdlp {
|
||||
cutPoints_t cutPoints;
|
||||
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
static indices_t sortIndices1(samples_t&);
|
||||
void computeCutPoints(size_t, size_t);
|
||||
void computeCutPointsAlternative(size_t, size_t);
|
||||
void computeCutPointsClassic(size_t, size_t);
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
precision_t halfWayValueCutPoint(size_t, size_t);
|
||||
tuple<precision_t, size_t> completeValueCutPoint(size_t, size_t, size_t);
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
public:
|
||||
CPPFImdlp(int algorithm = 0);
|
||||
CPPFImdlp();
|
||||
~CPPFImdlp();
|
||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||
samples_t getCutPoints();
|
||||
inline string version() { return "1.0.0"; };
|
||||
inline string version() { return "1.1.0"; };
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -29,8 +29,8 @@ namespace mdlp {
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) {
|
||||
return entropyCache[make_tuple(start, end)];
|
||||
if (entropyCache.find({ start, end }) != entropyCache.end()) {
|
||||
return entropyCache[{start, end}];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
@@ -42,7 +42,7 @@ namespace mdlp {
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[make_tuple(start, end)] = ventropy;
|
||||
entropyCache[{start, end}] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
|
@@ -1,6 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(main)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||
|
@@ -7,19 +7,21 @@
|
||||
using namespace std;
|
||||
using namespace mdlp;
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "../../tests/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
map<string, bool> datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
{"glass", true},
|
||||
{"kdd_JapaneseVowels", false},
|
||||
{"test", true}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, glass, iris, letter, kdd_JapaneseVowels, test}" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -42,9 +44,11 @@ int main(int argc, char** argv)
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp();
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
|
@@ -17,7 +17,7 @@ unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
vector<pair<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
@@ -50,7 +50,7 @@ void ArffFiles::load(string fileName, bool classLast)
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
attributes.push_back({ attribute, type });
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
|
@@ -2,12 +2,11 @@
|
||||
#define ARFFFILES_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
vector<pair<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
@@ -22,7 +21,7 @@ public:
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<pair<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels_t);
|
||||
};
|
||||
#endif
|
@@ -13,18 +13,13 @@ namespace mdlp {
|
||||
{
|
||||
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
algorithm = false;
|
||||
fit(X, y);
|
||||
}
|
||||
void setalgorithm(bool value)
|
||||
{
|
||||
algorithm = value;
|
||||
}
|
||||
void checkSortedVector()
|
||||
{
|
||||
indices_t testSortedIndices = sortIndices(X, y);
|
||||
precision_t prev = X[testSortedIndices[0]];
|
||||
for (auto i = 0; i < X.size(); ++i) {
|
||||
for (unsigned long i = 0; i < X.size(); ++i) {
|
||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||
prev = X[testSortedIndices[i]];
|
||||
@@ -34,7 +29,7 @@ namespace mdlp {
|
||||
{
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||
for (auto i = 0; i < cutPoints.size(); i++) {
|
||||
for (unsigned long i = 0; i < cutPoints.size(); i++) {
|
||||
EXPECT_NEAR(cutPoints[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
@@ -47,6 +42,19 @@ namespace mdlp {
|
||||
EXPECT_NEAR(expected[i], computed[i], precision);
|
||||
}
|
||||
}
|
||||
bool test_result(samples_t& X_, size_t cut, float midPoint, size_t limit, string title)
|
||||
{
|
||||
pair<precision_t, size_t> result;
|
||||
labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
X = X_;
|
||||
y = y_;
|
||||
indices = sortIndices(X, y);
|
||||
cout << "* " << title << endl;
|
||||
result = valueCutPoint(0, cut, 10);
|
||||
EXPECT_NEAR(result.first, midPoint, precision);
|
||||
EXPECT_EQ(result.second, limit);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||
{
|
||||
@@ -54,11 +62,6 @@ namespace mdlp {
|
||||
y = labels_t();
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm)
|
||||
{
|
||||
algorithm = 2;
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||
{
|
||||
X = { 1, 2, 3 };
|
||||
@@ -83,31 +86,41 @@ namespace mdlp {
|
||||
y = { 2, 2, 1 };
|
||||
indices = { 1, 2, 0 };
|
||||
}
|
||||
TEST_F(TestFImdlp, TestArtificialDatasetAlternative)
|
||||
TEST_F(TestFImdlp, TestShortDatasets)
|
||||
{
|
||||
algorithm = 1;
|
||||
vector<precision_t> computed;
|
||||
X = { 1 };
|
||||
y = { 1 };
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
EXPECT_EQ(computed.size(), 0);
|
||||
X = { 1, 3 };
|
||||
y = { 1, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 0);
|
||||
X = { 2, 4 };
|
||||
y = { 1, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 0);
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 1);
|
||||
EXPECT_NEAR(computed[0], 1.5, precision);
|
||||
}
|
||||
TEST_F(TestFImdlp, TestArtificialDataset)
|
||||
{
|
||||
algorithm = 0;
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
cutPoints_t expected = { 5.05 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
@@ -116,44 +129,17 @@ namespace mdlp {
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 6.25 },
|
||||
{ 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
|
||||
{ 0.80000001192092896, 1.4500000476837158, 1.75 }
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 0;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, TestIrisAlternative)
|
||||
{
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 5.75 },
|
||||
{ 2.8499999046325684, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75 },
|
||||
{ 2.75, 2.85, 2.95, 3.05, 3.35 },
|
||||
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
|
||||
{ 0.80000001192092896, 1.75 }
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 1;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
@@ -166,21 +152,30 @@ namespace mdlp {
|
||||
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
algorithm = 0;
|
||||
expected = { 1.5 };
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
samples_t X_ = { 0, 1, 2, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase)
|
||||
TEST_F(TestFImdlp, ValueCutPoint)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = { 1.5 };
|
||||
algorithm = true;
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
// Case titles as stated in the doc
|
||||
samples_t X1a{ 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 };
|
||||
test_result(X1a, 6, 7.3 / 2, 6, "1a");
|
||||
samples_t X2a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
|
||||
test_result(X2a, 6, 7.1 / 2, 4, "2a");
|
||||
samples_t X2b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
|
||||
test_result(X2b, 6, 7.5 / 2, 7, "2b");
|
||||
samples_t X3a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
|
||||
test_result(X3a, 4, 7.1 / 2, 4, "3a");
|
||||
samples_t X3b = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
|
||||
test_result(X3b, 4, 7.1 / 2, 4, "3b");
|
||||
samples_t X4a = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.9, 4.0 };
|
||||
test_result(X4a, 4, 6.9 / 2, 2, "4a");
|
||||
samples_t X4b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
|
||||
test_result(X4b, 4, 7.5 / 2, 7, "4b");
|
||||
samples_t X4c = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
|
||||
test_result(X4c, 4, 6.9 / 2, 2, "4c");
|
||||
}
|
||||
}
|
||||
|
332
tests/datasets/glass.arff
Executable file
332
tests/datasets/glass.arff
Executable file
@@ -0,0 +1,332 @@
|
||||
% 1. Title: Glass Identification Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: B. German
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% (b) Donor: Vina Spiehler, Ph.D., DABFT
|
||||
% Diagnostic Products Corporation
|
||||
% (213) 776-0180 (ext 3014)
|
||||
% (c) Date: September, 1987
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% -- Rule Induction in Forensic Science
|
||||
% -- Ian W. Evett and Ernest J. Spiehler
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% -- Unknown technical note number (sorry, not listed here)
|
||||
% -- General Results: nearest neighbor held its own with respect to the
|
||||
% rule-based system
|
||||
%
|
||||
% 4. Relevant Information:n
|
||||
% Vina conducted a comparison test of her rule-based system, BEAGLE, the
|
||||
% nearest-neighbor algorithm, and discriminant analysis. BEAGLE is
|
||||
% a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
|
||||
% Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
|
||||
% In determining whether the glass was a type of "float" glass or not,
|
||||
% the following results were obtained (# incorrect answers):
|
||||
%
|
||||
% Type of Sample Beagle NN DA
|
||||
% Windows that were float processed (87) 10 12 21
|
||||
% Windows that were not: (76) 19 16 22
|
||||
%
|
||||
% The study of classification of types of glass was motivated by
|
||||
% criminological investigation. At the scene of the crime, the glass left
|
||||
% can be used as evidence...if it is correctly identified!
|
||||
%
|
||||
% 5. Number of Instances: 214
|
||||
%
|
||||
% 6. Number of Attributes: 10 (including an Id#) plus the class attribute
|
||||
% -- all attributes are continuously valued
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. Id number: 1 to 214
|
||||
% 2. RI: refractive index
|
||||
% 3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as
|
||||
% are attributes 4-10)
|
||||
% 4. Mg: Magnesium
|
||||
% 5. Al: Aluminum
|
||||
% 6. Si: Silicon
|
||||
% 7. K: Potassium
|
||||
% 8. Ca: Calcium
|
||||
% 9. Ba: Barium
|
||||
% 10. Fe: Iron
|
||||
% 11. Type of glass: (class attribute)
|
||||
% -- 1 building_windows_float_processed
|
||||
% -- 2 building_windows_non_float_processed
|
||||
% -- 3 vehicle_windows_float_processed
|
||||
% -- 4 vehicle_windows_non_float_processed (none in this database)
|
||||
% -- 5 containers
|
||||
% -- 6 tableware
|
||||
% -- 7 headlamps
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Attribute: Min Max Mean SD Correlation with class
|
||||
% 2. RI: 1.5112 1.5339 1.5184 0.0030 -0.1642
|
||||
% 3. Na: 10.73 17.38 13.4079 0.8166 0.5030
|
||||
% 4. Mg: 0 4.49 2.6845 1.4424 -0.7447
|
||||
% 5. Al: 0.29 3.5 1.4449 0.4993 0.5988
|
||||
% 6. Si: 69.81 75.41 72.6509 0.7745 0.1515
|
||||
% 7. K: 0 6.21 0.4971 0.6522 -0.0100
|
||||
% 8. Ca: 5.43 16.19 8.9570 1.4232 0.0007
|
||||
% 9. Ba: 0 3.15 0.1750 0.4972 0.5751
|
||||
% 10. Fe: 0 0.51 0.0570 0.0974 -0.1879
|
||||
%
|
||||
% 9. Class Distribution: (out of 214 total instances)
|
||||
% -- 163 Window glass (building windows and vehicle windows)
|
||||
% -- 87 float processed
|
||||
% -- 70 building windows
|
||||
% -- 17 vehicle windows
|
||||
% -- 76 non-float processed
|
||||
% -- 76 building windows
|
||||
% -- 0 vehicle windows
|
||||
% -- 51 Non-window glass
|
||||
% -- 13 containers
|
||||
% -- 9 tableware
|
||||
% -- 29 headlamps
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
% Relabeled values in attribute 'Type'
|
||||
% From: '1' To: 'build wind float'
|
||||
% From: '2' To: 'build wind non-float'
|
||||
% From: '3' To: 'vehic wind float'
|
||||
% From: '4' To: 'vehic wind non-float'
|
||||
% From: '5' To: containers
|
||||
% From: '6' To: tableware
|
||||
% From: '7' To: headlamps
|
||||
%
|
||||
@relation Glass
|
||||
@attribute 'RI' real
|
||||
@attribute 'Na' real
|
||||
@attribute 'Mg' real
|
||||
@attribute 'Al' real
|
||||
@attribute 'Si' real
|
||||
@attribute 'K' real
|
||||
@attribute 'Ca' real
|
||||
@attribute 'Ba' real
|
||||
@attribute 'Fe' real
|
||||
@attribute 'Type' { 'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
|
||||
@data
|
||||
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
|
||||
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
|
||||
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0,0,'build wind float'
|
||||
1.51299,14.4,1.74,1.54,74.55,0,7.59,0,0,tableware
|
||||
1.53393,12.3,0,1,70.16,0.12,16.19,0,0.24,'build wind non-float'
|
||||
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,'build wind non-float'
|
||||
1.51779,13.64,3.65,0.65,73,0.06,8.93,0,0,'vehic wind float'
|
||||
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0,0,'build wind float'
|
||||
1.51545,14.14,0,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
|
||||
1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0,0.28,'build wind non-float'
|
||||
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0,0,'build wind non-float'
|
||||
1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0,0.24,'build wind non-float'
|
||||
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0,0,'build wind float'
|
||||
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0,0,'vehic wind float'
|
||||
1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0,0.17,'vehic wind float'
|
||||
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0,0,'build wind non-float'
|
||||
1.51719,14.75,0,2,73.02,0,8.53,1.59,0.08,headlamps
|
||||
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0,0,'build wind non-float'
|
||||
1.51994,13.27,0,1.76,73.03,0.47,11.32,0,0,containers
|
||||
1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0,0.17,'build wind float'
|
||||
1.52475,11.45,0,1.88,72.19,0.81,13.24,0,0.34,'build wind non-float'
|
||||
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0,0.22,'build wind non-float'
|
||||
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0,0.11,'build wind float'
|
||||
1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,containers
|
||||
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0,0,'build wind non-float'
|
||||
1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51683,14.56,0,1.98,73.29,0,8.52,1.57,0.07,headlamps
|
||||
1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0,0,'build wind non-float'
|
||||
1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0,0,'vehic wind float'
|
||||
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0,0,'build wind non-float'
|
||||
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0,0,'vehic wind float'
|
||||
1.51115,17.38,0,0.34,75.41,0,6.65,0,0,tableware
|
||||
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0,0,'build wind non-float'
|
||||
1.51755,13,3.6,1.36,72.99,0.57,8.4,0,0.11,'build wind float'
|
||||
1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0,0.24,'build wind float'
|
||||
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,'build wind float'
|
||||
1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0,0,'build wind non-float'
|
||||
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,'build wind non-float'
|
||||
1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0,0,'build wind non-float'
|
||||
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0,0,'build wind float'
|
||||
1.51806,13,3.8,1.08,73.07,0.56,8.38,0,0.12,'build wind non-float'
|
||||
1.51627,13,3.58,1.54,72.83,0.61,8.04,0,0,'build wind non-float'
|
||||
1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0,0,'build wind non-float'
|
||||
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,'vehic wind float'
|
||||
1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0,0,'build wind float'
|
||||
1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0,containers
|
||||
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0,'build wind float'
|
||||
1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0,0,'build wind float'
|
||||
1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0,0,'build wind non-float'
|
||||
1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0,0.06,'build wind float'
|
||||
1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,'build wind non-float'
|
||||
1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0,0,'build wind float'
|
||||
1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0,0,'build wind non-float'
|
||||
1.5164,14.37,0,2.74,72.85,0,9.45,0.54,0,headlamps
|
||||
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0,0.07,'build wind float'
|
||||
1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0,0,headlamps
|
||||
1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0,0,'build wind float'
|
||||
1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0,0,'vehic wind float'
|
||||
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0,0,'build wind non-float'
|
||||
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0,0.32,'build wind non-float'
|
||||
1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0,0,'build wind float'
|
||||
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0,0,'build wind float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0,0.16,'build wind float'
|
||||
1.51556,13.87,0,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
|
||||
1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0,0.11,'build wind float'
|
||||
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0,0.37,'vehic wind float'
|
||||
1.53125,10.73,0,2.1,69.81,0.58,13.3,3.15,0.28,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0,0.17,'build wind float'
|
||||
1.51829,14.46,2.24,1.62,72.38,0,9.26,0,0,tableware
|
||||
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0,0.14,'build wind non-float'
|
||||
1.51888,14.99,0.78,1.74,72.5,0,9.95,0,0,tableware
|
||||
1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0,0.1,'build wind non-float'
|
||||
1.523,13.31,3.58,0.82,71.99,0.12,10.17,0,0.03,'build wind float'
|
||||
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0,0,'build wind non-float'
|
||||
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0,0,'build wind float'
|
||||
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0,0.31,'build wind float'
|
||||
1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0,0,'vehic wind float'
|
||||
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0,0,'build wind float'
|
||||
1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0,0,'build wind float'
|
||||
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0,'build wind float'
|
||||
1.52127,14.32,3.9,0.83,71.5,0,9.49,0,0,'vehic wind float'
|
||||
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0,0,'build wind float'
|
||||
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0,0,containers
|
||||
1.518,13.71,3.93,1.54,71.81,0.54,8.21,0,0.15,'build wind non-float'
|
||||
1.52777,12.64,0,0.67,72.02,0.06,14.4,0,0,'build wind non-float'
|
||||
1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0,0.19,'build wind float'
|
||||
1.51764,12.98,3.54,1.21,73,0.65,8.53,0,0,'build wind float'
|
||||
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0,0,'build wind non-float'
|
||||
1.51645,14.94,0,1.87,73.11,0,8.67,1.38,0,headlamps
|
||||
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0,0.3,'build wind float'
|
||||
1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0,0.16,'build wind float'
|
||||
1.51937,13.79,2.41,1.19,72.76,0,9.77,0,0,tableware
|
||||
1.51514,14.85,0,2.42,73.72,0,8.39,0.56,0,headlamps
|
||||
1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0,0.07,'build wind float'
|
||||
1.51732,14.95,0,1.8,72.99,0,8.61,1.55,0,headlamps
|
||||
1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0,0.18,'build wind non-float'
|
||||
1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0,0,'build wind non-float'
|
||||
1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0,0,'build wind non-float'
|
||||
1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0,0,'build wind float'
|
||||
1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0,0.29,'build wind non-float'
|
||||
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0,headlamps
|
||||
1.51685,14.92,0,1.99,73.06,0,8.4,1.59,0,headlamps
|
||||
1.51658,14.8,0,1.99,73.11,0,8.28,1.71,0,headlamps
|
||||
1.51316,13.02,0,3.04,70.48,6.21,6.96,0,0,containers
|
||||
1.51709,13,3.47,1.79,72.72,0.66,8.18,0,0,'build wind non-float'
|
||||
1.51727,14.7,0,2.34,73.28,0,8.95,0.66,0,headlamps
|
||||
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0,0,'build wind float'
|
||||
1.51969,12.64,0,1.65,73.75,0.38,11.53,0,0,containers
|
||||
1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0,0.2,'build wind non-float'
|
||||
1.51617,14.95,0,2.27,73.3,0,8.71,0.67,0,headlamps
|
||||
1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0,0,'build wind float'
|
||||
1.51651,14.38,0,1.94,73.61,0,8.48,1.57,0,headlamps
|
||||
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0,0,'vehic wind float'
|
||||
1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0,0,headlamps
|
||||
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,'build wind non-float'
|
||||
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0,headlamps
|
||||
1.51818,13.72,0,0.56,74.45,0,10.99,0,0,'build wind non-float'
|
||||
1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0,0.24,'build wind float'
|
||||
1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0,0.24,'build wind non-float'
|
||||
1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0,0.24,'build wind float'
|
||||
1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0,0,'build wind non-float'
|
||||
1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0,0,'build wind non-float'
|
||||
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0,0.28,containers
|
||||
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0,0.17,'build wind float'
|
||||
1.51653,11.95,0,1.19,75.18,2.7,8.93,0,0,headlamps
|
||||
1.51623,14.14,0,2.88,72.61,0.08,9.18,1.06,0,headlamps
|
||||
1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0,'build wind float'
|
||||
1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0,0,'build wind float'
|
||||
1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0,0,'build wind non-float'
|
||||
1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0,0,'build wind non-float'
|
||||
1.52065,14.36,0,2.02,73.42,0,8.44,1.64,0,headlamps
|
||||
1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0,0.14,'build wind float'
|
||||
1.52369,13.44,0,1.58,72.22,0.32,12.24,0,0,containers
|
||||
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0,'build wind float'
|
||||
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0,0,'build wind float'
|
||||
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0,0,'build wind non-float'
|
||||
1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0,0,'build wind float'
|
||||
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0,0.09,'build wind non-float'
|
||||
1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0,0,'build wind float'
|
||||
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0,'build wind float'
|
||||
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0,'build wind float'
|
||||
1.51666,12.86,0,1.83,73.88,0.97,10.17,0,0,containers
|
||||
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0,0,'build wind non-float'
|
||||
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0,0.12,'build wind non-float'
|
||||
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0,0,'build wind non-float'
|
||||
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0,0.17,'build wind non-float'
|
||||
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0,0.12,'build wind non-float'
|
||||
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'
|
||||
1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0,headlamps
|
||||
1.52227,14.17,3.81,0.78,71.35,0,9.69,0,0,'build wind float'
|
||||
1.52614,13.7,0,1.36,71.24,0.19,13.44,0,0.1,'build wind non-float'
|
||||
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0,0,'build wind non-float'
|
||||
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0,0,'vehic wind float'
|
||||
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0,0,'build wind float'
|
||||
1.51508,15.15,0,2.25,73.5,0,8.34,0.63,0,headlamps
|
||||
1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0,0,containers
|
||||
1.51966,14.77,3.75,0.29,72.02,0.03,9,0,0,'build wind float'
|
||||
1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0,0,'build wind non-float'
|
||||
1.52664,11.23,0,0.77,73.21,0,14.68,0,0,'build wind non-float'
|
||||
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0,0.11,'build wind float'
|
||||
1.51602,14.85,0,2.38,73.28,0,8.76,0.64,0.09,headlamps
|
||||
1.51321,13,0,3.02,70.7,6.21,6.93,0,0,containers
|
||||
1.52739,11.02,0,0.75,73.08,0,14.96,0,0,'build wind non-float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0,0,'build wind float'
|
||||
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0,0.35,'build wind non-float'
|
||||
1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0,0,'build wind non-float'
|
||||
1.51609,15.01,0,2.51,73.05,0.05,8.83,0.53,0,headlamps
|
||||
1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0,0,'build wind non-float'
|
||||
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0,0.19,'build wind non-float'
|
||||
1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0,0.1,'build wind float'
|
||||
1.51831,14.39,0,1.82,72.86,1.41,6.47,2.88,0,headlamps
|
||||
1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0,0,'build wind float'
|
||||
1.51613,13.88,1.78,1.79,73.1,0,8.67,0.76,0,headlamps
|
||||
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0,0,'build wind float'
|
||||
1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0,0,'build wind float'
|
||||
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0,0,containers
|
||||
1.51969,14.56,0,0.56,73.48,0,11.22,0,0,tableware
|
||||
1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0,0,'build wind non-float'
|
||||
1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0,0.1,'build wind non-float'
|
||||
1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0,0.09,'vehic wind float'
|
||||
1.52222,14.43,0,1,72.67,0.1,11.52,0,0.08,'build wind non-float'
|
||||
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0,0,'build wind float'
|
||||
1.51711,14.23,0,2.08,73.36,0,8.62,1.67,0,headlamps
|
||||
1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0,0,'build wind float'
|
||||
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0,0,'build wind float'
|
||||
1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0,0.1,'vehic wind float'
|
||||
1.52043,13.38,0,1.4,72.25,0.33,12.5,0,0,containers
|
||||
1.519,13.49,3.48,1.35,71.95,0.55,9,0,0,'build wind float'
|
||||
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0,0.09,'build wind float'
|
||||
1.51905,14,2.39,1.56,72.37,0,9.57,0,0,tableware
|
||||
1.51531,14.38,0,2.66,73.1,0.04,9.08,0.64,0,headlamps
|
||||
1.51916,14.15,0,2.09,72.74,0,10.88,0,0,tableware
|
||||
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0,0.15,'build wind non-float'
|
||||
1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0,0,'build wind non-float'
|
||||
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0,0,'build wind non-float'
|
||||
1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0,0.09,'build wind non-float'
|
||||
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0,0.21,'build wind non-float'
|
||||
1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0,0,'build wind non-float'
|
||||
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0,0.16,'build wind float'
|
||||
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0,0,'vehic wind float'
|
||||
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,'build wind float'
|
||||
1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0,0,'build wind non-float'
|
||||
1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0,0,'build wind float'
|
||||
1.51623,14.2,0,2.79,73.46,0.04,9.04,0.4,0.09,headlamps
|
||||
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0,'build wind float'
|
||||
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0,0,'build wind float'
|
||||
1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0,0,'vehic wind float'
|
||||
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0,0.14,'build wind non-float'
|
||||
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0,0,'build wind non-float'
|
||||
1.51852,14.09,2.19,1.66,72.67,0,9.32,0,0,tableware
|
180
tests/datasets/test.arff
Executable file
180
tests/datasets/test.arff
Executable file
@@ -0,0 +1,180 @@
|
||||
% 1. Title: Test Feature extracted from Glass
|
||||
%
|
||||
|
||||
@RELATION test
|
||||
|
||||
@ATTRIBUTE Mg REAL
|
||||
@ATTRIBUTE Type {0,1,2,3,4,5,6}
|
||||
|
||||
@DATA
|
||||
3.5,0
|
||||
3.52,1
|
||||
1.74,2
|
||||
0.0,3
|
||||
2.85,3
|
||||
3.65,1
|
||||
2.84,0
|
||||
0.0,4
|
||||
3.9,3
|
||||
3.58,3
|
||||
3.25,3
|
||||
3.76,1
|
||||
3.45,1
|
||||
3.48,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
2.96,3
|
||||
3.65,0
|
||||
0.0,3
|
||||
3.74,3
|
||||
3.66,0
|
||||
1.61,5
|
||||
3.49,3
|
||||
3.52,3
|
||||
3.54,3
|
||||
3.53,1
|
||||
3.56,3
|
||||
3.34,1
|
||||
0.0,2
|
||||
3.61,3
|
||||
3.6,0
|
||||
3.46,0
|
||||
2.72,3
|
||||
3.51,3
|
||||
3.09,3
|
||||
3.48,0
|
||||
3.8,3
|
||||
3.58,3
|
||||
3.54,1
|
||||
3.42,0
|
||||
2.68,5
|
||||
3.49,0
|
||||
3.68,3
|
||||
3.6,0
|
||||
3.59,3
|
||||
0.0,4
|
||||
3.54,0
|
||||
2.2,4
|
||||
3.59,0
|
||||
3.66,1
|
||||
3.87,3
|
||||
3.45,0
|
||||
3.82,0
|
||||
3.72,0
|
||||
3.33,0
|
||||
3.78,1
|
||||
2.24,2
|
||||
3.83,3
|
||||
0.78,2
|
||||
3.9,3
|
||||
3.58,0
|
||||
3.57,3
|
||||
3.52,0
|
||||
3.47,0
|
||||
3.48,0
|
||||
3.66,0
|
||||
3.62,0
|
||||
3.39,0
|
||||
0.0,3
|
||||
3.55,0
|
||||
1.01,3
|
||||
0.0,4
|
||||
3.43,0
|
||||
3.58,0
|
||||
0.0,4
|
||||
3.74,0
|
||||
0.0,4
|
||||
3.44,3
|
||||
3.97,3
|
||||
3.6,0
|
||||
3.64,3
|
||||
1.83,4
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
3.73,0
|
||||
3.58,1
|
||||
3.34,4
|
||||
2.09,3
|
||||
2.71,0
|
||||
3.18,3
|
||||
3.43,0
|
||||
3.15,3
|
||||
3.56,0
|
||||
0.0,4
|
||||
0.0,4
|
||||
4.49,0
|
||||
3.59,0
|
||||
3.56,3
|
||||
3.52,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
3.61,0
|
||||
3.74,0
|
||||
3.62,3
|
||||
3.84,0
|
||||
3.67,0
|
||||
3.58,0
|
||||
0.0,5
|
||||
3.66,3
|
||||
3.68,3
|
||||
2.28,3
|
||||
3.67,3
|
||||
3.2,4
|
||||
3.81,0
|
||||
0.0,3
|
||||
3.39,1
|
||||
3.57,0
|
||||
1.85,5
|
||||
3.75,0
|
||||
3.76,3
|
||||
0.0,3
|
||||
3.86,0
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,3
|
||||
3.5,0
|
||||
3.67,3
|
||||
3.55,3
|
||||
0.0,4
|
||||
3.61,3
|
||||
3.41,3
|
||||
3.7,0
|
||||
0.0,4
|
||||
3.58,0
|
||||
1.78,4
|
||||
3.85,0
|
||||
3.48,0
|
||||
1.71,5
|
||||
0.0,2
|
||||
3.5,3
|
||||
3.49,3
|
||||
3.36,1
|
||||
0.0,3
|
||||
3.54,0
|
||||
0.0,4
|
||||
2.87,0
|
||||
3.57,1
|
||||
3.48,0
|
||||
2.81,0
|
||||
0.0,4
|
||||
0.0,2
|
||||
3.62,3
|
||||
3.58,3
|
||||
3.45,3
|
||||
3.48,3
|
||||
3.54,3
|
||||
3.54,3
|
||||
3.37,0
|
||||
3.41,1
|
||||
3.48,0
|
||||
3.43,3
|
||||
3.5,0
|
||||
0.0,4
|
||||
3.54,0
|
||||
3.52,3
|
||||
3.52,3
|
||||
2.88,3
|
||||
2.19,2
|
@@ -11,7 +11,7 @@ namespace mdlp {
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<tuple<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<pair<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user