mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-17 16:35:57 +00:00
Compare commits
61 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
0b35a15d62 | ||
c662a96da8
|
|||
0ead15be7c
|
|||
da41a9317d
|
|||
42e83b3d26
|
|||
77135739cf
|
|||
27ea3bf338
|
|||
12222f7903
|
|||
cfade7a556
|
|||
f0845c5bd1
|
|||
1f4abade2c
|
|||
770502c8e5
|
|||
ed7433672d
|
|||
14860ea0b9
|
|||
d9a6f528f6
|
|||
7551b0d669
|
|||
ffb8df4d1c
|
|||
ed784736ca
|
|||
49e9dd3e12
|
|||
083a56b311
|
|||
4492252729
|
|||
c00b7a613c
|
|||
200015000c
|
|||
ce9ddb3be3
|
|||
90428218c2
|
|||
0b63d9ace0
|
|||
6875127394
|
|||
747f610ce9
|
|||
a7d13f602d
|
|||
552b03afc9
|
|||
4a9664c4aa
|
|||
964555de20
|
|||
d6cece1006
|
|||
|
e25ca378f0 | ||
71c1dc2928
|
|||
ebea31afd1
|
|||
89d675eb1f
|
|||
e8fcc20a32
|
|||
848ee7ba24
|
|||
|
32a6fd9ba0 | ||
cd04f97fd0
|
|||
458a313aee
|
|||
e97aea2a4d
|
|||
4707bc0b7f
|
|||
8c868981e8
|
|||
e812e91540
|
|||
dddeea4024
|
|||
5b7d66d922
|
|||
|
5d5eb98afc | ||
a44f01460a
|
|||
de25ba78bd
|
|||
79c029832a
|
|||
5bb0e1e6ca
|
|||
dec1295933
|
|||
04c1772019
|
|||
e37702dcb0
|
|||
1c7492d3b6
|
|||
2ab828b400
|
|||
|
1b89f5927c
|
||
7b20bde428
|
|||
13ebb43bf3
|
41
.github/workflows/build.yml
vendored
Normal file
41
.github/workflows/build.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Build
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- "*"
|
||||
pull_request:
|
||||
types: [ opened, synchronize, reopened ]
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||
steps:
|
||||
- uses: actions/checkout@v3.2.0
|
||||
with:
|
||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||
- name: Install sonar-scanner and build-wrapper
|
||||
uses: SonarSource/sonarcloud-github-c-cpp@v1
|
||||
- name: Install lcov & gcovr
|
||||
run: |
|
||||
sudo apt-get -y install lcov
|
||||
sudo apt-get -y install gcovr
|
||||
- name: Tests & build-wrapper
|
||||
run: |
|
||||
cmake -S . -B build -Wno-dev
|
||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
|
||||
cd build
|
||||
make
|
||||
ctest -C Release --output-on-failure --test-dir tests
|
||||
cd ..
|
||||
# gcovr -f CPPFImdlp.cpp -f Metrics.cpp --merge-mode-functions=separate --txt --sonarqube=coverage.xml
|
||||
gcovr -f CPPFImdlp.cpp -f Metrics.cpp --txt --sonarqube=coverage.xml
|
||||
- name: Run sonar-scanner
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
run: |
|
||||
sonar-scanner --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
||||
--define sonar.coverageReportPaths=coverage.xml
|
2
.gitignore
vendored
2
.gitignore
vendored
@@ -35,3 +35,5 @@
|
||||
.idea
|
||||
cmake-*
|
||||
**/CMakeFiles
|
||||
.vscode/*
|
||||
**/gcovr-report
|
24
.vscode/launch.json
vendored
Normal file
24
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "lldb samplex",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"targetArchitecture": "arm64",
|
||||
"program": "${workspaceRoot}/sample/build/sample",
|
||||
"args": [
|
||||
"-f",
|
||||
"glass"
|
||||
],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceRoot}/sample/build/",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "lldb"
|
||||
}
|
||||
]
|
||||
}
|
9
.vscode/settings.json
vendored
Normal file
9
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"sonarlint.connectedMode.project": {
|
||||
"connectionId": "rmontanana",
|
||||
"projectKey": "rmontanana_mdlp"
|
||||
},
|
||||
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
||||
"cmake.configureOnOpen": true,
|
||||
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json"
|
||||
}
|
@@ -1,7 +1,13 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(mdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp sample/sample.cpp)
|
||||
add_subdirectory(sample)
|
||||
add_subdirectory(tests)
|
||||
|
||||
|
224
CPPFImdlp.cpp
224
CPPFImdlp.cpp
@@ -4,108 +4,132 @@
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||
{
|
||||
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
||||
max_depth(max_depth_),
|
||||
proposed_cuts(proposed) {
|
||||
}
|
||||
CPPFImdlp::~CPPFImdlp()
|
||||
= default;
|
||||
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
|
||||
CPPFImdlp::CPPFImdlp() = default;
|
||||
|
||||
CPPFImdlp::~CPPFImdlp() = default;
|
||||
|
||||
size_t CPPFImdlp::compute_max_num_cut_points() const {
|
||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||
if (proposed_cuts == 0) {
|
||||
return numeric_limits<size_t>::max();
|
||||
}
|
||||
if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
|
||||
throw invalid_argument("wrong proposed num_cuts value");
|
||||
}
|
||||
if (proposed_cuts < 1)
|
||||
return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
|
||||
return static_cast<size_t>(proposed_cuts);
|
||||
}
|
||||
|
||||
void CPPFImdlp::fit(samples_t &X_, labels_t &y_) {
|
||||
X = X_;
|
||||
y = y_;
|
||||
num_cut_points = compute_max_num_cut_points();
|
||||
depth = 0;
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.size() == 0 || y.size() == 0) {
|
||||
if (X.empty() || y.empty()) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
if (min_length < 3) {
|
||||
throw invalid_argument("min_length must be greater than 2");
|
||||
}
|
||||
if (max_depth < 1) {
|
||||
throw invalid_argument("max_depth must be greater than 0");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
switch (algorithm) {
|
||||
case 0:
|
||||
computeCutPoints(0, X.size());
|
||||
break;
|
||||
case 1:
|
||||
computeCutPointsAlternative(0, X.size());
|
||||
break;
|
||||
default:
|
||||
throw invalid_argument("algorithm must be 0 or 1");
|
||||
}
|
||||
return *this;
|
||||
computeCutPoints(0, X.size(), 1);
|
||||
}
|
||||
precision_t CPPFImdlp::halfWayValueCutPoint(size_t start, size_t idx)
|
||||
{
|
||||
size_t idxPrev = idx - 1;
|
||||
precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
return (previous + actual) / 2;
|
||||
}
|
||||
tuple<precision_t, size_t> CPPFImdlp::completeValueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t idxPrev = cut - 1;
|
||||
precision_t previous, next, actual;
|
||||
|
||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) {
|
||||
size_t n;
|
||||
size_t m;
|
||||
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||
bool backWall; // true if duplicates reach beginning of the interval
|
||||
precision_t previous;
|
||||
precision_t actual;
|
||||
precision_t next;
|
||||
previous = X[indices[idxPrev]];
|
||||
next = actual = X[indices[cut]];
|
||||
actual = X[indices[cut]];
|
||||
next = X[indices[idxNext]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
// get the first equal value of X in the interval
|
||||
while (idxPrev > start && actual == previous) {
|
||||
previous = X[indices[--idxPrev]];
|
||||
}
|
||||
backWall = idxPrev == start && actual == previous;
|
||||
// get the last equal value of X in the interval
|
||||
while (actual == X[indices[cut++]] && cut < end);
|
||||
if (previous == actual && cut < end)
|
||||
actual = X[indices[cut]];
|
||||
cut--;
|
||||
return make_tuple((previous + actual) / 2, cut);
|
||||
while (idxNext < end - 1 && actual == next) {
|
||||
next = X[indices[++idxNext]];
|
||||
}
|
||||
// # of duplicates before cutpoint
|
||||
n = cut - 1 - idxPrev;
|
||||
// # of duplicates after cutpoint
|
||||
m = idxNext - cut - 1;
|
||||
// Decide which values to use
|
||||
cut = cut + (backWall ? m + 1 : -n);
|
||||
actual = X[indices[cut]];
|
||||
return {(actual + previous) / 2, cut};
|
||||
}
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||
{
|
||||
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) {
|
||||
size_t cut;
|
||||
tuple<precision_t, size_t> result;
|
||||
if (end - start < 2)
|
||||
pair<precision_t, size_t> result;
|
||||
if (cutPoints.size() == num_cut_points)
|
||||
return;
|
||||
// Check if the interval length and the depth are Ok
|
||||
if (end - start < min_length || depth_ > max_depth)
|
||||
return;
|
||||
depth = depth_ > depth ? depth_ : depth;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = completeValueCutPoint(start, cut, end);
|
||||
cut = get<1>(result);
|
||||
cutPoints.push_back(get<0>(result));
|
||||
computeCutPoints(start, cut);
|
||||
computeCutPoints(cut, end);
|
||||
result = valueCutPoint(start, cut, end);
|
||||
cut = result.second;
|
||||
cutPoints.push_back(result.first);
|
||||
computeCutPoints(start, cut, depth_ + 1);
|
||||
computeCutPoints(cut, end, depth_ + 1);
|
||||
}
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
cutPoints.push_back(halfWayValueCutPoint(start, cut));
|
||||
computeCutPointsAlternative(start, cut);
|
||||
computeCutPointsAlternative(cut, end);
|
||||
}
|
||||
}
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end) {
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||
precision_t entropy_left, entropy_right, minEntropy;
|
||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max();
|
||||
size_t elements = end - start;
|
||||
bool sameValues = true;
|
||||
precision_t entropy_left;
|
||||
precision_t entropy_right;
|
||||
precision_t minEntropy;
|
||||
// Check if all the values of the variable in the interval are the same
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
if (X[indices[idx]] != X[indices[start]]) {
|
||||
sameValues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sameValues)
|
||||
return candidate;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (auto idx = start + 1; idx < end; idx++) {
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
|
||||
entropy_left = precision_t(idx - start) / static_cast<float>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<float>(elements) * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
@@ -113,15 +137,17 @@ namespace mdlp {
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k, k1, k2;
|
||||
precision_t ig, delta;
|
||||
precision_t ent, ent1, ent2;
|
||||
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) {
|
||||
int k;
|
||||
int k1;
|
||||
int k2;
|
||||
precision_t ig;
|
||||
precision_t delta;
|
||||
precision_t ent;
|
||||
precision_t ent1;
|
||||
precision_t ent2;
|
||||
auto N = precision_t(end - start);
|
||||
if (N < 2) {
|
||||
return false;
|
||||
}
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
@@ -129,35 +155,31 @@ namespace mdlp {
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
|
||||
delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) {
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
for (size_t i = 0; i < X_.size(); i++)
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2)
|
||||
{
|
||||
if (X_[i1] == X_[i2]) return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||
if (X_[i1] == X_[i2])
|
||||
return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
cutPoints_t CPPFImdlp::getCutPoints()
|
||||
{
|
||||
// Remove duplicates and sort
|
||||
cutPoints_t output(cutPoints.size());
|
||||
set<precision_t> s;
|
||||
unsigned size = cutPoints.size();
|
||||
for (unsigned i = 0; i < size; i++)
|
||||
s.insert(cutPoints[i]);
|
||||
output.assign(s.begin(), s.end());
|
||||
sort(output.begin(), output.end());
|
||||
return output;
|
||||
|
||||
cutPoints_t CPPFImdlp::getCutPoints() {
|
||||
sort(cutPoints.begin(), cutPoints.end());
|
||||
return cutPoints;
|
||||
}
|
||||
|
||||
int CPPFImdlp::get_depth() const {
|
||||
return depth;
|
||||
}
|
||||
}
|
||||
|
49
CPPFImdlp.h
49
CPPFImdlp.h
@@ -1,33 +1,52 @@
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
int algorithm;
|
||||
indices_t indices;
|
||||
samples_t X;
|
||||
labels_t y;
|
||||
Metrics metrics;
|
||||
size_t min_length = 3;
|
||||
int depth = 0;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
float proposed_cuts = 0;
|
||||
indices_t indices = indices_t();
|
||||
samples_t X = samples_t();
|
||||
labels_t y = labels_t();
|
||||
Metrics metrics = Metrics(y, indices);
|
||||
cutPoints_t cutPoints;
|
||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
void computeCutPoints(size_t, size_t);
|
||||
void computeCutPointsAlternative(size_t, size_t);
|
||||
|
||||
void computeCutPoints(size_t, size_t, int);
|
||||
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
|
||||
size_t getCandidate(size_t, size_t);
|
||||
precision_t halfWayValueCutPoint(size_t, size_t);
|
||||
tuple<precision_t, size_t> completeValueCutPoint(size_t, size_t, size_t);
|
||||
|
||||
size_t compute_max_num_cut_points() const;
|
||||
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
|
||||
public:
|
||||
CPPFImdlp(int algorithm = 0);
|
||||
CPPFImdlp();
|
||||
|
||||
CPPFImdlp(size_t, int, float);
|
||||
|
||||
~CPPFImdlp();
|
||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||
samples_t getCutPoints();
|
||||
inline string version() { return "1.0.0"; };
|
||||
|
||||
void fit(samples_t&, labels_t&);
|
||||
|
||||
cutPoints_t getCutPoints();
|
||||
|
||||
int get_depth() const;
|
||||
|
||||
static inline string version() { return "1.1.1"; };
|
||||
};
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
50
Metrics.cpp
50
Metrics.cpp
@@ -1,63 +1,71 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
||||
{
|
||||
Metrics::Metrics(labels_t &y_, indices_t &indices_) : y(y_), indices(indices_),
|
||||
numClasses(computeNumClasses(0, indices.size())) {
|
||||
}
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
|
||||
int Metrics::computeNumClasses(size_t start, size_t end) {
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return nClasses.size();
|
||||
return static_cast<int>(nClasses.size());
|
||||
}
|
||||
void Metrics::setData(labels_t& y_, indices_t& indices_)
|
||||
{
|
||||
|
||||
void Metrics::setData(const labels_t &y_, const indices_t &indices_) {
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
precision_t p, ventropy = 0;
|
||||
|
||||
precision_t Metrics::entropy(size_t start, size_t end) {
|
||||
precision_t p;
|
||||
precision_t ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) {
|
||||
return entropyCache[make_tuple(start, end)];
|
||||
if (entropyCache.find({start, end}) != entropyCache.end()) {
|
||||
return entropyCache[{start, end}];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
for (auto count: counts) {
|
||||
if (count > 0) {
|
||||
p = (precision_t)count / nElements;
|
||||
p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[make_tuple(start, end)] = ventropy;
|
||||
entropyCache[{start, end}] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end) {
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval, entropyLeft, entropyRight;
|
||||
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
||||
int nElements = end - start;
|
||||
precision_t entropyInterval;
|
||||
precision_t entropyLeft;
|
||||
precision_t entropyRight;
|
||||
size_t nElementsLeft = cut - start;
|
||||
size_t nElementsRight = end - cut;
|
||||
size_t nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements;
|
||||
iGain = entropyInterval -
|
||||
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
||||
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
||||
static_cast<precision_t>(nElements);
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
return iGain;
|
||||
}
|
||||
|
18
Metrics.h
18
Metrics.h
@@ -1,19 +1,25 @@
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
labels_t &y;
|
||||
indices_t &indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache;
|
||||
cacheIg_t igCache;
|
||||
cacheEnt_t entropyCache = cacheEnt_t();
|
||||
cacheIg_t igCache = cacheIg_t();
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(labels_t&, indices_t&);
|
||||
Metrics(labels_t &, indices_t &);
|
||||
|
||||
void setData(const labels_t &, const indices_t &);
|
||||
|
||||
int computeNumClasses(size_t, size_t);
|
||||
|
||||
precision_t entropy(size_t, size_t);
|
||||
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
|
12
README.md
12
README.md
@@ -1,3 +1,7 @@
|
||||
[](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
|
||||
# mdlp
|
||||
|
||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||
@@ -7,6 +11,11 @@ The implementation tries to mitigate the problem of different label values with
|
||||
- Sorts the values of the variable using the label values as a tie-breaker
|
||||
- Once found a valid candidate for the split, it checks if the previous value is the same as actual one, and tries to get previous one, or next if the former is not possible.
|
||||
|
||||
Other features:
|
||||
|
||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||
- Intervals have to have more than two examples to be evaluated.
|
||||
|
||||
The algorithm returns the cut points for the variable.
|
||||
|
||||
## Sample
|
||||
@@ -19,7 +28,8 @@ mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
./sample iris
|
||||
./sample -f iris -m 2
|
||||
./sample -h
|
||||
```
|
||||
|
||||
## Test
|
||||
|
21
sample/.vscode/launch.json
vendored
Normal file
21
sample/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "lldb puro",
|
||||
"type": "cppdbg",
|
||||
// "targetArchitecture": "arm64",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/build/sample",
|
||||
"args": [
|
||||
"-f",
|
||||
"iris"
|
||||
],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceRoot}/build/",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "lldb"
|
||||
},
|
||||
]
|
||||
}
|
@@ -1,6 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(main)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||
|
@@ -1,55 +1,187 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <getopt.h>
|
||||
#include "../CPPFImdlp.h"
|
||||
#include "../tests/ArffFiles.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mdlp;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "../../tests/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
return 1;
|
||||
}
|
||||
const string PATH = "../../tests/datasets/";
|
||||
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
/* print a description of all supported options */
|
||||
void usage(const char *path) {
|
||||
/* take only the last portion of the path */
|
||||
const char *basename = strrchr(path, '/');
|
||||
basename = basename ? basename + 1 : path;
|
||||
|
||||
cout << "usage: " << basename << "[OPTION]" << endl;
|
||||
cout << " -h, --help\t\t Print this help and exit." << endl;
|
||||
cout
|
||||
<< " -f, --file[=FILENAME]\t {all, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
||||
<< endl;
|
||||
cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
|
||||
cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
|
||||
cout
|
||||
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any"
|
||||
<< endl;
|
||||
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
|
||||
}
|
||||
|
||||
tuple<string, string, int, int, float> parse_arguments(int argc, char **argv) {
|
||||
string file_name;
|
||||
string path = PATH;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
int min_length = 3;
|
||||
float max_cutpoints = 0;
|
||||
const option long_options[] = {
|
||||
{"help", no_argument, nullptr, 'h'},
|
||||
{"file", required_argument, nullptr, 'f'},
|
||||
{"path", required_argument, nullptr, 'p'},
|
||||
{"max_depth", required_argument, nullptr, 'm'},
|
||||
{"max_cutpoints", required_argument, nullptr, 'c'},
|
||||
{"min_length", required_argument, nullptr, 'n'},
|
||||
{nullptr, no_argument, nullptr, 0}
|
||||
};
|
||||
while (true) {
|
||||
const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, nullptr);
|
||||
if (c == -1)
|
||||
break;
|
||||
switch (c) {
|
||||
case 'h':
|
||||
usage(argv[0]);
|
||||
exit(0);
|
||||
case 'f':
|
||||
file_name = string(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
max_depth = stoi(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
min_length = stoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
max_cutpoints = stof(optarg);
|
||||
break;
|
||||
case 'p':
|
||||
path = optarg;
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
break;
|
||||
case '?':
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
if (file_name.empty()) {
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
|
||||
}
|
||||
|
||||
void process_file(const string &path, const string &file_name, bool class_last, int max_depth, int min_length,
|
||||
float max_cutpoints) {
|
||||
ArffFiles file;
|
||||
|
||||
file.load(path + file_name + ".arff", class_last);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
auto items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute : attributes) {
|
||||
for (auto attribute: attributes) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (auto feature : X) {
|
||||
vector<samples_t> &X = file.getX();
|
||||
labels_t &y = file.getY();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (auto feature: X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
auto total = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
for (auto item: test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
total += test.getCutPoints().size();
|
||||
}
|
||||
cout << "Total cut points ...: " << total << endl;
|
||||
cout << "Total feature states: " << total + attributes.size() << endl;
|
||||
}
|
||||
|
||||
void process_all_files(const map<string, bool> &datasets, const string &path, int max_depth, int min_length,
|
||||
float max_cutpoints) {
|
||||
cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
||||
<< max_cutpoints << endl << endl;
|
||||
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
||||
printf("==================== ==== ==== ========\n");
|
||||
for (const auto &dataset: datasets) {
|
||||
ArffFiles file;
|
||||
file.load(path + dataset.first + ".arff", dataset.second);
|
||||
auto attributes = file.getAttributes();
|
||||
vector<samples_t> &X = file.getX();
|
||||
labels_t &y = file.getY();
|
||||
size_t timing = 0;
|
||||
int cut_points = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
||||
test.fit(X[i], y);
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
timing += std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
|
||||
cut_points += test.getCutPoints().size();
|
||||
}
|
||||
printf("%-20s %4lu %4d %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
map<string, bool> datasets = {
|
||||
{"glass", true},
|
||||
{"iris", true},
|
||||
{"kdd_JapaneseVowels", false},
|
||||
{"letter", true},
|
||||
{"liver-disorders", true},
|
||||
{"mfeat-factors", true},
|
||||
{"test", true}
|
||||
};
|
||||
string file_name;
|
||||
string path;
|
||||
int max_depth;
|
||||
int min_length;
|
||||
float max_cutpoints;
|
||||
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
||||
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
||||
cout << "Invalid file name: " << file_name << endl;
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
if (file_name == "all")
|
||||
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
||||
else {
|
||||
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
||||
cout << "File name ....: " << file_name << endl;
|
||||
cout << "Max depth ....: " << max_depth << endl;
|
||||
cout << "Min length ...: " << min_length << endl;
|
||||
cout << "Max cutpoints : " << max_cutpoints << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
12
sonar-project.properties
Normal file
12
sonar-project.properties
Normal file
@@ -0,0 +1,12 @@
|
||||
sonar.projectKey=rmontanana_mdlp
|
||||
sonar.organization=rmontanana
|
||||
|
||||
# This is the name and version displayed in the SonarCloud UI.
|
||||
#sonar.projectName=mdlp
|
||||
#sonar.projectVersion=1.0
|
||||
|
||||
# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.
|
||||
#sonar.sources=.
|
||||
|
||||
# Encoding of the source code. Default is default system encoding
|
||||
sonar.sourceEncoding=UTF-8
|
@@ -2,87 +2,92 @@
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArffFiles::ArffFiles()
|
||||
{
|
||||
}
|
||||
vector<string> ArffFiles::getLines()
|
||||
{
|
||||
ArffFiles::ArffFiles() = default;
|
||||
|
||||
vector<string> ArffFiles::getLines() const {
|
||||
return lines;
|
||||
}
|
||||
unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
|
||||
unsigned long int ArffFiles::getSize() const {
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
|
||||
vector<pair<string, string>> ArffFiles::getAttributes() const {
|
||||
return attributes;
|
||||
}
|
||||
string ArffFiles::getClassName()
|
||||
{
|
||||
|
||||
string ArffFiles::getClassName() const {
|
||||
return className;
|
||||
}
|
||||
string ArffFiles::getClassType()
|
||||
{
|
||||
|
||||
string ArffFiles::getClassType() const {
|
||||
return classType;
|
||||
}
|
||||
vector<vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
|
||||
vector<vector<float>> &ArffFiles::getX() {
|
||||
return X;
|
||||
}
|
||||
vector<int>& ArffFiles::getY()
|
||||
{
|
||||
|
||||
vector<int> &ArffFiles::getY() {
|
||||
return y;
|
||||
}
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
|
||||
void ArffFiles::load(const string &fileName, bool classLast) {
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
}
|
||||
generateDataset(classLast);
|
||||
} else
|
||||
if (!file.is_open()) {
|
||||
throw invalid_argument("Unable to open file");
|
||||
}
|
||||
string line;
|
||||
string keyword;
|
||||
string attribute;
|
||||
string type;
|
||||
string type_w;
|
||||
while (getline(file, line)) {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute;
|
||||
type = "";
|
||||
while (ss >> type_w)
|
||||
type += type_w + " ";
|
||||
attributes.emplace_back(attribute, trim(type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
}
|
||||
generateDataset(classLast);
|
||||
|
||||
}
|
||||
void ArffFiles::generateDataset(bool classLast)
|
||||
{
|
||||
|
||||
void ArffFiles::generateDataset(bool classLast) {
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
auto yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
int pos = 0;
|
||||
int xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
@@ -93,20 +98,20 @@ void ArffFiles::generateDataset(bool classLast)
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
string ArffFiles::trim(const string& source)
|
||||
{
|
||||
|
||||
string ArffFiles::trim(const string &source) {
|
||||
string s(source);
|
||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
|
||||
{
|
||||
|
||||
vector<int> ArffFiles::factorize(const vector<string> &labels_t) {
|
||||
vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
map<string, int> labelMap;
|
||||
int i = 0;
|
||||
for (string label : labels_t) {
|
||||
for (const string &label: labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
|
@@ -1,28 +1,44 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<pair<string, string>> attributes;
|
||||
string className;
|
||||
string classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
|
||||
void generateDataset(bool);
|
||||
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(string, bool = true);
|
||||
vector<string> getLines();
|
||||
unsigned long int getSize();
|
||||
string getClassName();
|
||||
string getClassType();
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels_t);
|
||||
|
||||
void load(const string &, bool = true);
|
||||
|
||||
vector<string> getLines() const;
|
||||
|
||||
unsigned long int getSize() const;
|
||||
|
||||
string getClassName() const;
|
||||
|
||||
string getClassType() const;
|
||||
|
||||
static string trim(const string &);
|
||||
|
||||
vector<vector<float>> &getX();
|
||||
|
||||
vector<int> &getY();
|
||||
|
||||
vector<pair<string, string>> getAttributes() const;
|
||||
|
||||
static vector<int> factorize(const vector<string> &labels_t);
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,15 +1,12 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(FImdlp)
|
||||
|
||||
# GoogleTest requires at least C++14
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
include(FetchContent)
|
||||
|
||||
include_directories(${GTEST_INCLUDE_DIRS})
|
||||
|
||||
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
@@ -18,7 +15,7 @@ FetchContent_MakeAvailable(googletest)
|
||||
enable_testing()
|
||||
|
||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||
|
@@ -1,186 +1,298 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
#include "../CPPFImdlp.h"
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include "ArffFiles.h"
|
||||
|
||||
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
|
||||
try { \
|
||||
stmt; \
|
||||
} catch (const etype& ex) { \
|
||||
EXPECT_EQ(whatstring, std::string(ex.what())); \
|
||||
throw; \
|
||||
} \
|
||||
, etype)
|
||||
|
||||
namespace mdlp {
|
||||
class TestFImdlp: public CPPFImdlp, public testing::Test {
|
||||
class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||
public:
|
||||
precision_t precision = 0.000001;
|
||||
TestFImdlp(): CPPFImdlp() {}
|
||||
void SetUp()
|
||||
{
|
||||
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
algorithm = false;
|
||||
precision_t precision = 0.000001f;
|
||||
|
||||
TestFImdlp() : CPPFImdlp() {}
|
||||
|
||||
string data_path;
|
||||
|
||||
void SetUp() override {
|
||||
X = {4.7f, 4.7f, 4.7f, 4.7f, 4.8f, 4.8f, 4.8f, 4.8f, 4.9f, 4.95f, 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f,
|
||||
6.0f, 5.1f, 5.9f};
|
||||
y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
||||
fit(X, y);
|
||||
data_path = set_data_path();
|
||||
}
|
||||
void setalgorithm(bool value)
|
||||
{
|
||||
algorithm = value;
|
||||
|
||||
static string set_data_path() {
|
||||
string path = "../datasets/";
|
||||
ifstream file(path + "iris.arff");
|
||||
if (file.is_open()) {
|
||||
file.close();
|
||||
return path;
|
||||
}
|
||||
return "../../tests/datasets/";
|
||||
}
|
||||
void checkSortedVector()
|
||||
{
|
||||
|
||||
void checkSortedVector() {
|
||||
indices_t testSortedIndices = sortIndices(X, y);
|
||||
precision_t prev = X[testSortedIndices[0]];
|
||||
for (auto i = 0; i < X.size(); ++i) {
|
||||
for (unsigned long i = 0; i < X.size(); ++i) {
|
||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||
prev = X[testSortedIndices[i]];
|
||||
}
|
||||
}
|
||||
void checkCutPoints(cutPoints_t& expected)
|
||||
{
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||
for (auto i = 0; i < cutPoints.size(); i++) {
|
||||
EXPECT_NEAR(cutPoints[i], expected[i], precision);
|
||||
|
||||
void checkCutPoints(cutPoints_t &computed, cutPoints_t &expected) const {
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
cout << "(" << computed[i] << ", " << expected[i] << ") ";
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
template<typename T, typename A>
|
||||
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||
{
|
||||
EXPECT_EQ(expected.size(), computed.size());
|
||||
ASSERT_EQ(expected.size(), computed.size());
|
||||
for (auto i = 0; i < expected.size(); i++) {
|
||||
EXPECT_NEAR(expected[i], computed[i], precision);
|
||||
|
||||
bool test_result(const samples_t &X_, size_t cut, float midPoint, size_t limit, const string &title) {
|
||||
pair<precision_t, size_t> result;
|
||||
labels_t y_ = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
X = X_;
|
||||
y = y_;
|
||||
indices = sortIndices(X, y);
|
||||
cout << "* " << title << endl;
|
||||
result = valueCutPoint(0, cut, 10);
|
||||
EXPECT_NEAR(result.first, midPoint, precision);
|
||||
EXPECT_EQ(result.second, limit);
|
||||
return true;
|
||||
}
|
||||
|
||||
void test_dataset(CPPFImdlp &test, const string &filename, vector<cutPoints_t> &expected,
|
||||
vector<int> &depths) const {
|
||||
ArffFiles file;
|
||||
file.load(data_path + filename + ".arff", true);
|
||||
vector<samples_t> &X = file.getX();
|
||||
labels_t &y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
test.fit(X[feature], y);
|
||||
EXPECT_EQ(test.get_depth(), depths[feature]);
|
||||
auto computed = test.getCutPoints();
|
||||
cout << "Feature " << feature << ": ";
|
||||
checkCutPoints(computed, expected[feature]);
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||
{
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorEmptyDataset) {
|
||||
X = samples_t();
|
||||
y = labels_t();
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have at least one element");
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm)
|
||||
{
|
||||
algorithm = 2;
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorDifferentSize) {
|
||||
X = {1, 2, 3};
|
||||
y = {1, 2};
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size");
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||
{
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2 };
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth) {
|
||||
auto testLength = CPPFImdlp(2, 10, 0);
|
||||
auto testDepth = CPPFImdlp(3, 0, 0);
|
||||
X = {1, 2, 3};
|
||||
y = {1, 2, 3};
|
||||
EXPECT_THROW_WITH_MESSAGE(testLength.fit(X, y), invalid_argument, "min_length must be greater than 2");
|
||||
EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
|
||||
}
|
||||
TEST_F(TestFImdlp, SortIndices)
|
||||
{
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorMaxCutPoints) {
|
||||
auto testmin = CPPFImdlp(2, 10, -1);
|
||||
auto testmax = CPPFImdlp(3, 0, 200);
|
||||
X = {1, 2, 3};
|
||||
y = {1, 2, 3};
|
||||
EXPECT_THROW_WITH_MESSAGE(testmin.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
|
||||
EXPECT_THROW_WITH_MESSAGE(testmax.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SortIndices) {
|
||||
X = {5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, 6.0f, 5.1f, 5.9f};
|
||||
y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
||||
indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7};
|
||||
checkSortedVector();
|
||||
X = { 5.77, 5.88, 5.99 };
|
||||
y = { 1, 2, 1 };
|
||||
indices = { 0, 1, 2 };
|
||||
X = {5.77f, 5.88f, 5.99f};
|
||||
y = {1, 2, 1};
|
||||
indices = {0, 1, 2};
|
||||
checkSortedVector();
|
||||
X = { 5.33, 5.22, 5.11 };
|
||||
y = { 1, 2, 1 };
|
||||
indices = { 2, 1, 0 };
|
||||
X = {5.33f, 5.22f, 5.11f};
|
||||
y = {1, 2, 1};
|
||||
indices = {2, 1, 0};
|
||||
checkSortedVector();
|
||||
X = { 5.33, 5.22, 5.33 };
|
||||
y = { 2, 2, 1 };
|
||||
indices = { 1, 2, 0 };
|
||||
X = {5.33f, 5.22f, 5.33f};
|
||||
y = {2, 2, 1};
|
||||
indices = {1, 2, 0};
|
||||
}
|
||||
TEST_F(TestFImdlp, TestArtificialDatasetAlternative)
|
||||
{
|
||||
algorithm = 1;
|
||||
|
||||
TEST_F(TestFImdlp, TestShortDatasets) {
|
||||
vector<precision_t> computed;
|
||||
X = {1};
|
||||
y = {1};
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), 0);
|
||||
X = {1, 3};
|
||||
y = {1, 2};
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 0);
|
||||
X = {2, 4};
|
||||
y = {1, 2};
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 0);
|
||||
X = {1, 2, 3};
|
||||
y = {1, 2, 2};
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 1);
|
||||
EXPECT_NEAR(computed[0], 1.5, precision);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, TestArtificialDataset) {
|
||||
fit(X, y);
|
||||
cutPoints_t expected = {5.05f};
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, TestArtificialDataset)
|
||||
{
|
||||
algorithm = 0;
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, TestIris)
|
||||
{
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
TEST_F(TestFImdlp, TestIris) {
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 6.25 },
|
||||
{ 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
|
||||
{ 0.80000001192092896, 1.4500000476837158, 1.75 }
|
||||
{5.45f, 5.75f},
|
||||
{2.75f, 2.85f, 2.95f, 3.05f, 3.35f},
|
||||
{2.45f, 4.75f, 5.05f},
|
||||
{0.8f, 1.75f}
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 0;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||
}
|
||||
}
|
||||
vector<int> depths = {3, 5, 4, 3};
|
||||
auto test = CPPFImdlp();
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
TEST_F(TestFImdlp, TestIrisAlternative)
|
||||
{
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
TEST_F(TestFImdlp, ComputeCutPointsGCase) {
|
||||
cutPoints_t expected;
|
||||
expected = {1.5};
|
||||
samples_t X_ = {0, 1, 2, 2, 2};
|
||||
labels_t y_ = {1, 1, 1, 2, 2};
|
||||
fit(X_, y_);
|
||||
auto computed = getCutPoints();
|
||||
checkCutPoints(computed, expected);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, ValueCutPoint) {
|
||||
// Case titles as stated in the doc
|
||||
samples_t X1a{3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.0f};
|
||||
test_result(X1a, 6, 7.3f / 2, 6, "1a");
|
||||
samples_t X2a = {3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
|
||||
test_result(X2a, 6, 7.1f / 2, 4, "2a");
|
||||
samples_t X2b = {3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
|
||||
test_result(X2b, 6, 7.5f / 2, 7, "2b");
|
||||
samples_t X3a = {3.f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
|
||||
test_result(X3a, 4, 7.1f / 2, 4, "3a");
|
||||
samples_t X3b = {3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f};
|
||||
test_result(X3b, 4, 7.1f / 2, 4, "3b");
|
||||
samples_t X4a = {3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.9f, 4.0f};
|
||||
test_result(X4a, 4, 6.9f / 2, 2, "4a");
|
||||
samples_t X4b = {3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
|
||||
test_result(X4b, 4, 7.5f / 2, 7, "4b");
|
||||
samples_t X4c = {3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f};
|
||||
test_result(X4c, 4, 6.9f / 2, 2, "4c");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxDepth) {
|
||||
// Set max_depth to 1
|
||||
auto test = CPPFImdlp(3, 1, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 5.75 },
|
||||
{ 2.8499999046325684, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75 },
|
||||
{ 0.80000001192092896, 1.75 }
|
||||
{5.45f},
|
||||
{3.35f},
|
||||
{2.45f},
|
||||
{0.8f}
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 1;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||
}
|
||||
vector<int> depths = {1, 1, 1, 1};
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MinLength) {
|
||||
auto test = CPPFImdlp(75, 100, 0);
|
||||
// Set min_length to 75
|
||||
vector<cutPoints_t> expected = {
|
||||
{5.45f, 5.75f},
|
||||
{2.85f, 3.35f},
|
||||
{2.45f, 4.75f},
|
||||
{0.8f, 1.75f}
|
||||
};
|
||||
vector<int> depths = {3, 2, 2, 2};
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MinLengthMaxDepth) {
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{5.45f, 5.75f},
|
||||
{2.85f, 3.35f},
|
||||
{2.45f, 4.75f},
|
||||
{0.8f, 1.75f}
|
||||
};
|
||||
vector<int> depths = {2, 2, 2, 2};
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxCutPointsInteger) {
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 1);
|
||||
vector<cutPoints_t> expected = {
|
||||
{5.45f},
|
||||
{3.35f},
|
||||
{2.45f},
|
||||
{0.8f}
|
||||
};
|
||||
vector<int> depths = {1, 1, 1, 1};
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxCutPointsFloat) {
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 0.2f);
|
||||
vector<cutPoints_t> expected = {
|
||||
{5.45f, 5.75f},
|
||||
{2.85f, 3.35f},
|
||||
{2.45f, 4.75f},
|
||||
{0.8f, 1.75f}
|
||||
};
|
||||
vector<int> depths = {2, 2, 2, 2};
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, ProposedCuts) {
|
||||
vector<pair<float, size_t>> proposed_list = {{0.1f, 2},
|
||||
{0.5f, 10},
|
||||
{0.07f, 1},
|
||||
{1.0f, 1},
|
||||
{2.0f, 2}};
|
||||
size_t expected;
|
||||
size_t computed;
|
||||
for (auto proposed_item: proposed_list) {
|
||||
tie(proposed_cuts, expected) = proposed_item;
|
||||
computed = compute_max_num_cut_points();
|
||||
ASSERT_EQ(expected, computed);
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
algorithm = 0;
|
||||
expected = { 1.5 };
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = { 1.5 };
|
||||
algorithm = true;
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -1,43 +1,40 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
|
||||
|
||||
namespace mdlp {
|
||||
class TestMetrics: public Metrics, public testing::Test {
|
||||
class TestMetrics : public Metrics, public testing::Test {
|
||||
public:
|
||||
labels_t y;
|
||||
samples_t X;
|
||||
indices_t indices;
|
||||
precision_t precision = 0.000001;
|
||||
labels_t y_ = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
|
||||
indices_t indices_ = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
precision_t precision = 0.000001f;
|
||||
|
||||
TestMetrics(): Metrics(y, indices) {}
|
||||
void SetUp()
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
setData(y, indices);
|
||||
TestMetrics() : Metrics(y_, indices_) {};
|
||||
|
||||
void SetUp() override {
|
||||
setData(y_, indices_);
|
||||
}
|
||||
};
|
||||
TEST_F(TestMetrics, NumClasses)
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
|
||||
TEST_F(TestMetrics, NumClasses) {
|
||||
y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
|
||||
EXPECT_EQ(1, computeNumClasses(4, 8));
|
||||
EXPECT_EQ(2, computeNumClasses(0, 10));
|
||||
EXPECT_EQ(2, computeNumClasses(8, 10));
|
||||
}
|
||||
TEST_F(TestMetrics, Entropy)
|
||||
{
|
||||
|
||||
TEST_F(TestMetrics, Entropy) {
|
||||
EXPECT_EQ(1, entropy(0, 10));
|
||||
EXPECT_EQ(0, entropy(0, 5));
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.468996, entropy(0, 10), precision);
|
||||
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
||||
}
|
||||
TEST_F(TestMetrics, InformationGain)
|
||||
{
|
||||
|
||||
TEST_F(TestMetrics, InformationGain) {
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
|
||||
y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision);
|
||||
ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
|
||||
}
|
||||
}
|
||||
|
@@ -1,4 +0,0 @@
|
||||
rm -fr lcoverage/*
|
||||
lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
||||
genhtml lcoverage/main_coverage.info --output-directory lcoverage
|
||||
open lcoverage/index.html
|
332
tests/datasets/glass.arff
Executable file
332
tests/datasets/glass.arff
Executable file
@@ -0,0 +1,332 @@
|
||||
% 1. Title: Glass Identification Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: B. German
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% (b) Donor: Vina Spiehler, Ph.D., DABFT
|
||||
% Diagnostic Products Corporation
|
||||
% (213) 776-0180 (ext 3014)
|
||||
% (c) Date: September, 1987
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% -- Rule Induction in Forensic Science
|
||||
% -- Ian W. Evett and Ernest J. Spiehler
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% -- Unknown technical note number (sorry, not listed here)
|
||||
% -- General Results: nearest neighbor held its own with respect to the
|
||||
% rule-based system
|
||||
%
|
||||
% 4. Relevant Information:n
|
||||
% Vina conducted a comparison test of her rule-based system, BEAGLE, the
|
||||
% nearest-neighbor algorithm, and discriminant analysis. BEAGLE is
|
||||
% a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
|
||||
% Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
|
||||
% In determining whether the glass was a type of "float" glass or not,
|
||||
% the following results were obtained (# incorrect answers):
|
||||
%
|
||||
% Type of Sample Beagle NN DA
|
||||
% Windows that were float processed (87) 10 12 21
|
||||
% Windows that were not: (76) 19 16 22
|
||||
%
|
||||
% The study of classification of types of glass was motivated by
|
||||
% criminological investigation. At the scene of the crime, the glass left
|
||||
% can be used as evidence...if it is correctly identified!
|
||||
%
|
||||
% 5. Number of Instances: 214
|
||||
%
|
||||
% 6. Number of Attributes: 10 (including an Id#) plus the class attribute
|
||||
% -- all attributes are continuously valued
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. Id number: 1 to 214
|
||||
% 2. RI: refractive index
|
||||
% 3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as
|
||||
% are attributes 4-10)
|
||||
% 4. Mg: Magnesium
|
||||
% 5. Al: Aluminum
|
||||
% 6. Si: Silicon
|
||||
% 7. K: Potassium
|
||||
% 8. Ca: Calcium
|
||||
% 9. Ba: Barium
|
||||
% 10. Fe: Iron
|
||||
% 11. Type of glass: (class attribute)
|
||||
% -- 1 building_windows_float_processed
|
||||
% -- 2 building_windows_non_float_processed
|
||||
% -- 3 vehicle_windows_float_processed
|
||||
% -- 4 vehicle_windows_non_float_processed (none in this database)
|
||||
% -- 5 containers
|
||||
% -- 6 tableware
|
||||
% -- 7 headlamps
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Attribute: Min Max Mean SD Correlation with class
|
||||
% 2. RI: 1.5112 1.5339 1.5184 0.0030 -0.1642
|
||||
% 3. Na: 10.73 17.38 13.4079 0.8166 0.5030
|
||||
% 4. Mg: 0 4.49 2.6845 1.4424 -0.7447
|
||||
% 5. Al: 0.29 3.5 1.4449 0.4993 0.5988
|
||||
% 6. Si: 69.81 75.41 72.6509 0.7745 0.1515
|
||||
% 7. K: 0 6.21 0.4971 0.6522 -0.0100
|
||||
% 8. Ca: 5.43 16.19 8.9570 1.4232 0.0007
|
||||
% 9. Ba: 0 3.15 0.1750 0.4972 0.5751
|
||||
% 10. Fe: 0 0.51 0.0570 0.0974 -0.1879
|
||||
%
|
||||
% 9. Class Distribution: (out of 214 total instances)
|
||||
% -- 163 Window glass (building windows and vehicle windows)
|
||||
% -- 87 float processed
|
||||
% -- 70 building windows
|
||||
% -- 17 vehicle windows
|
||||
% -- 76 non-float processed
|
||||
% -- 76 building windows
|
||||
% -- 0 vehicle windows
|
||||
% -- 51 Non-window glass
|
||||
% -- 13 containers
|
||||
% -- 9 tableware
|
||||
% -- 29 headlamps
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
% Relabeled values in attribute 'Type'
|
||||
% From: '1' To: 'build wind float'
|
||||
% From: '2' To: 'build wind non-float'
|
||||
% From: '3' To: 'vehic wind float'
|
||||
% From: '4' To: 'vehic wind non-float'
|
||||
% From: '5' To: containers
|
||||
% From: '6' To: tableware
|
||||
% From: '7' To: headlamps
|
||||
%
|
||||
@relation Glass
|
||||
@attribute 'RI' real
|
||||
@attribute 'Na' real
|
||||
@attribute 'Mg' real
|
||||
@attribute 'Al' real
|
||||
@attribute 'Si' real
|
||||
@attribute 'K' real
|
||||
@attribute 'Ca' real
|
||||
@attribute 'Ba' real
|
||||
@attribute 'Fe' real
|
||||
@attribute 'Type' {'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
|
||||
@data
|
||||
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
|
||||
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
|
||||
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0,0,'build wind float'
|
||||
1.51299,14.4,1.74,1.54,74.55,0,7.59,0,0,tableware
|
||||
1.53393,12.3,0,1,70.16,0.12,16.19,0,0.24,'build wind non-float'
|
||||
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,'build wind non-float'
|
||||
1.51779,13.64,3.65,0.65,73,0.06,8.93,0,0,'vehic wind float'
|
||||
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0,0,'build wind float'
|
||||
1.51545,14.14,0,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
|
||||
1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0,0.28,'build wind non-float'
|
||||
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0,0,'build wind non-float'
|
||||
1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0,0.24,'build wind non-float'
|
||||
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0,0,'build wind float'
|
||||
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0,0,'vehic wind float'
|
||||
1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0,0.17,'vehic wind float'
|
||||
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0,0,'build wind non-float'
|
||||
1.51719,14.75,0,2,73.02,0,8.53,1.59,0.08,headlamps
|
||||
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0,0,'build wind non-float'
|
||||
1.51994,13.27,0,1.76,73.03,0.47,11.32,0,0,containers
|
||||
1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0,0.17,'build wind float'
|
||||
1.52475,11.45,0,1.88,72.19,0.81,13.24,0,0.34,'build wind non-float'
|
||||
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0,0.22,'build wind non-float'
|
||||
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0,0.11,'build wind float'
|
||||
1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,containers
|
||||
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0,0,'build wind non-float'
|
||||
1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51683,14.56,0,1.98,73.29,0,8.52,1.57,0.07,headlamps
|
||||
1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0,0,'build wind non-float'
|
||||
1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0,0,'vehic wind float'
|
||||
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0,0,'build wind non-float'
|
||||
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0,0,'vehic wind float'
|
||||
1.51115,17.38,0,0.34,75.41,0,6.65,0,0,tableware
|
||||
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0,0,'build wind non-float'
|
||||
1.51755,13,3.6,1.36,72.99,0.57,8.4,0,0.11,'build wind float'
|
||||
1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0,0.24,'build wind float'
|
||||
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,'build wind float'
|
||||
1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0,0,'build wind non-float'
|
||||
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,'build wind non-float'
|
||||
1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0,0,'build wind non-float'
|
||||
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0,0,'build wind float'
|
||||
1.51806,13,3.8,1.08,73.07,0.56,8.38,0,0.12,'build wind non-float'
|
||||
1.51627,13,3.58,1.54,72.83,0.61,8.04,0,0,'build wind non-float'
|
||||
1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0,0,'build wind non-float'
|
||||
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,'vehic wind float'
|
||||
1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0,0,'build wind float'
|
||||
1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0,containers
|
||||
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0,'build wind float'
|
||||
1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0,0,'build wind float'
|
||||
1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0,0,'build wind non-float'
|
||||
1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0,0.06,'build wind float'
|
||||
1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,'build wind non-float'
|
||||
1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0,0,'build wind float'
|
||||
1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0,0,'build wind non-float'
|
||||
1.5164,14.37,0,2.74,72.85,0,9.45,0.54,0,headlamps
|
||||
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0,0.07,'build wind float'
|
||||
1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0,0,headlamps
|
||||
1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0,0,'build wind float'
|
||||
1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0,0,'vehic wind float'
|
||||
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0,0,'build wind non-float'
|
||||
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0,0.32,'build wind non-float'
|
||||
1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0,0,'build wind float'
|
||||
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0,0,'build wind float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0,0.16,'build wind float'
|
||||
1.51556,13.87,0,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
|
||||
1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0,0.11,'build wind float'
|
||||
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0,0.37,'vehic wind float'
|
||||
1.53125,10.73,0,2.1,69.81,0.58,13.3,3.15,0.28,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0,0.17,'build wind float'
|
||||
1.51829,14.46,2.24,1.62,72.38,0,9.26,0,0,tableware
|
||||
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0,0.14,'build wind non-float'
|
||||
1.51888,14.99,0.78,1.74,72.5,0,9.95,0,0,tableware
|
||||
1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0,0.1,'build wind non-float'
|
||||
1.523,13.31,3.58,0.82,71.99,0.12,10.17,0,0.03,'build wind float'
|
||||
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0,0,'build wind non-float'
|
||||
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0,0,'build wind float'
|
||||
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0,0.31,'build wind float'
|
||||
1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0,0,'vehic wind float'
|
||||
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0,0,'build wind float'
|
||||
1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0,0,'build wind float'
|
||||
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0,'build wind float'
|
||||
1.52127,14.32,3.9,0.83,71.5,0,9.49,0,0,'vehic wind float'
|
||||
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0,0,'build wind float'
|
||||
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0,0,containers
|
||||
1.518,13.71,3.93,1.54,71.81,0.54,8.21,0,0.15,'build wind non-float'
|
||||
1.52777,12.64,0,0.67,72.02,0.06,14.4,0,0,'build wind non-float'
|
||||
1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0,0.19,'build wind float'
|
||||
1.51764,12.98,3.54,1.21,73,0.65,8.53,0,0,'build wind float'
|
||||
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0,0,'build wind non-float'
|
||||
1.51645,14.94,0,1.87,73.11,0,8.67,1.38,0,headlamps
|
||||
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0,0.3,'build wind float'
|
||||
1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0,0.16,'build wind float'
|
||||
1.51937,13.79,2.41,1.19,72.76,0,9.77,0,0,tableware
|
||||
1.51514,14.85,0,2.42,73.72,0,8.39,0.56,0,headlamps
|
||||
1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0,0.07,'build wind float'
|
||||
1.51732,14.95,0,1.8,72.99,0,8.61,1.55,0,headlamps
|
||||
1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0,0.18,'build wind non-float'
|
||||
1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0,0,'build wind non-float'
|
||||
1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0,0,'build wind non-float'
|
||||
1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0,0,'build wind float'
|
||||
1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0,0.29,'build wind non-float'
|
||||
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0,headlamps
|
||||
1.51685,14.92,0,1.99,73.06,0,8.4,1.59,0,headlamps
|
||||
1.51658,14.8,0,1.99,73.11,0,8.28,1.71,0,headlamps
|
||||
1.51316,13.02,0,3.04,70.48,6.21,6.96,0,0,containers
|
||||
1.51709,13,3.47,1.79,72.72,0.66,8.18,0,0,'build wind non-float'
|
||||
1.51727,14.7,0,2.34,73.28,0,8.95,0.66,0,headlamps
|
||||
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0,0,'build wind float'
|
||||
1.51969,12.64,0,1.65,73.75,0.38,11.53,0,0,containers
|
||||
1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0,0.2,'build wind non-float'
|
||||
1.51617,14.95,0,2.27,73.3,0,8.71,0.67,0,headlamps
|
||||
1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0,0,'build wind float'
|
||||
1.51651,14.38,0,1.94,73.61,0,8.48,1.57,0,headlamps
|
||||
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0,0,'vehic wind float'
|
||||
1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0,0,headlamps
|
||||
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,'build wind non-float'
|
||||
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0,headlamps
|
||||
1.51818,13.72,0,0.56,74.45,0,10.99,0,0,'build wind non-float'
|
||||
1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0,0.24,'build wind float'
|
||||
1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0,0.24,'build wind non-float'
|
||||
1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0,0.24,'build wind float'
|
||||
1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0,0,'build wind non-float'
|
||||
1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0,0,'build wind non-float'
|
||||
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0,0.28,containers
|
||||
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0,0.17,'build wind float'
|
||||
1.51653,11.95,0,1.19,75.18,2.7,8.93,0,0,headlamps
|
||||
1.51623,14.14,0,2.88,72.61,0.08,9.18,1.06,0,headlamps
|
||||
1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0,'build wind float'
|
||||
1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0,0,'build wind float'
|
||||
1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0,0,'build wind non-float'
|
||||
1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0,0,'build wind non-float'
|
||||
1.52065,14.36,0,2.02,73.42,0,8.44,1.64,0,headlamps
|
||||
1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0,0.14,'build wind float'
|
||||
1.52369,13.44,0,1.58,72.22,0.32,12.24,0,0,containers
|
||||
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0,'build wind float'
|
||||
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0,0,'build wind float'
|
||||
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0,0,'build wind non-float'
|
||||
1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0,0,'build wind float'
|
||||
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0,0.09,'build wind non-float'
|
||||
1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0,0,'build wind float'
|
||||
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0,'build wind float'
|
||||
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0,'build wind float'
|
||||
1.51666,12.86,0,1.83,73.88,0.97,10.17,0,0,containers
|
||||
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0,0,'build wind non-float'
|
||||
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0,0.12,'build wind non-float'
|
||||
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0,0,'build wind non-float'
|
||||
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0,0.17,'build wind non-float'
|
||||
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0,0.12,'build wind non-float'
|
||||
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'
|
||||
1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0,headlamps
|
||||
1.52227,14.17,3.81,0.78,71.35,0,9.69,0,0,'build wind float'
|
||||
1.52614,13.7,0,1.36,71.24,0.19,13.44,0,0.1,'build wind non-float'
|
||||
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0,0,'build wind non-float'
|
||||
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0,0,'vehic wind float'
|
||||
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0,0,'build wind float'
|
||||
1.51508,15.15,0,2.25,73.5,0,8.34,0.63,0,headlamps
|
||||
1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0,0,containers
|
||||
1.51966,14.77,3.75,0.29,72.02,0.03,9,0,0,'build wind float'
|
||||
1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0,0,'build wind non-float'
|
||||
1.52664,11.23,0,0.77,73.21,0,14.68,0,0,'build wind non-float'
|
||||
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0,0.11,'build wind float'
|
||||
1.51602,14.85,0,2.38,73.28,0,8.76,0.64,0.09,headlamps
|
||||
1.51321,13,0,3.02,70.7,6.21,6.93,0,0,containers
|
||||
1.52739,11.02,0,0.75,73.08,0,14.96,0,0,'build wind non-float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0,0,'build wind float'
|
||||
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0,0.35,'build wind non-float'
|
||||
1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0,0,'build wind non-float'
|
||||
1.51609,15.01,0,2.51,73.05,0.05,8.83,0.53,0,headlamps
|
||||
1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0,0,'build wind non-float'
|
||||
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0,0.19,'build wind non-float'
|
||||
1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0,0.1,'build wind float'
|
||||
1.51831,14.39,0,1.82,72.86,1.41,6.47,2.88,0,headlamps
|
||||
1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0,0,'build wind float'
|
||||
1.51613,13.88,1.78,1.79,73.1,0,8.67,0.76,0,headlamps
|
||||
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0,0,'build wind float'
|
||||
1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0,0,'build wind float'
|
||||
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0,0,containers
|
||||
1.51969,14.56,0,0.56,73.48,0,11.22,0,0,tableware
|
||||
1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0,0,'build wind non-float'
|
||||
1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0,0.1,'build wind non-float'
|
||||
1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0,0.09,'vehic wind float'
|
||||
1.52222,14.43,0,1,72.67,0.1,11.52,0,0.08,'build wind non-float'
|
||||
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0,0,'build wind float'
|
||||
1.51711,14.23,0,2.08,73.36,0,8.62,1.67,0,headlamps
|
||||
1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0,0,'build wind float'
|
||||
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0,0,'build wind float'
|
||||
1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0,0.1,'vehic wind float'
|
||||
1.52043,13.38,0,1.4,72.25,0.33,12.5,0,0,containers
|
||||
1.519,13.49,3.48,1.35,71.95,0.55,9,0,0,'build wind float'
|
||||
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0,0.09,'build wind float'
|
||||
1.51905,14,2.39,1.56,72.37,0,9.57,0,0,tableware
|
||||
1.51531,14.38,0,2.66,73.1,0.04,9.08,0.64,0,headlamps
|
||||
1.51916,14.15,0,2.09,72.74,0,10.88,0,0,tableware
|
||||
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0,0.15,'build wind non-float'
|
||||
1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0,0,'build wind non-float'
|
||||
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0,0,'build wind non-float'
|
||||
1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0,0.09,'build wind non-float'
|
||||
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0,0.21,'build wind non-float'
|
||||
1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0,0,'build wind non-float'
|
||||
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0,0.16,'build wind float'
|
||||
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0,0,'vehic wind float'
|
||||
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,'build wind float'
|
||||
1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0,0,'build wind non-float'
|
||||
1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0,0,'build wind float'
|
||||
1.51623,14.2,0,2.79,73.46,0.04,9.04,0.4,0.09,headlamps
|
||||
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0,'build wind float'
|
||||
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0,0,'build wind float'
|
||||
1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0,0,'vehic wind float'
|
||||
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0,0.14,'build wind non-float'
|
||||
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0,0,'build wind non-float'
|
||||
1.51852,14.09,2.19,1.66,72.67,0,9.32,0,0,tableware
|
399
tests/datasets/liver-disorders.arff
Executable file
399
tests/datasets/liver-disorders.arff
Executable file
@@ -0,0 +1,399 @@
|
||||
% 1. Title: BUPA liver disorders
|
||||
%
|
||||
% 2. Source information:
|
||||
% -- Creators: BUPA Medical Research Ltd.
|
||||
% -- Donor: Richard S. Forsyth
|
||||
% 8 Grosvenor Avenue
|
||||
% Mapperley Park
|
||||
% Nottingham NG3 5DX
|
||||
% 0602-621676
|
||||
% -- Date: 5/15/1990
|
||||
%
|
||||
% 3. Past usage:
|
||||
% -- None known other than what is shown in the PC/BEAGLE User's Guide
|
||||
% (written by Richard S. Forsyth).
|
||||
%
|
||||
% 4. Relevant information:
|
||||
% -- The first 5 variables are all blood tests which are thought
|
||||
% to be sensitive to liver disorders that might arise from
|
||||
% excessive alcohol consumption. Each line in the bupa.data file
|
||||
% constitutes the record of a single male individual.
|
||||
% -- It appears that drinks>5 is some sort of a selector on this database.
|
||||
% See the PC/BEAGLE User's Guide for more information.
|
||||
%
|
||||
% 5. Number of instances: 345
|
||||
%
|
||||
% 6. Number of attributes: 7 overall
|
||||
%
|
||||
% 7. Attribute information:
|
||||
% 1. mcv mean corpuscular volume
|
||||
% 2. alkphos alkaline phosphotase
|
||||
% 3. sgpt alamine aminotransferase
|
||||
% 4. sgot aspartate aminotransferase
|
||||
% 5. gammagt gamma-glutamyl transpeptidase
|
||||
% 6. drinks number of half-pint equivalents of alcoholic beverages
|
||||
% drunk per day
|
||||
% 7. selector field used to split data into two sets
|
||||
%
|
||||
% 8. Missing values: none%
|
||||
% Information about the dataset
|
||||
% CLASSTYPE: nominal
|
||||
% CLASSINDEX: last
|
||||
%
|
||||
|
||||
@relation liver-disorders
|
||||
|
||||
@attribute mcv INTEGER
|
||||
@attribute alkphos INTEGER
|
||||
@attribute sgpt INTEGER
|
||||
@attribute sgot INTEGER
|
||||
@attribute gammagt INTEGER
|
||||
@attribute drinks REAL
|
||||
@attribute selector {1,2}
|
||||
|
||||
@data
|
||||
85,92,45,27,31,0.0,1
|
||||
85,64,59,32,23,0.0,2
|
||||
86,54,33,16,54,0.0,2
|
||||
91,78,34,24,36,0.0,2
|
||||
87,70,12,28,10,0.0,2
|
||||
98,55,13,17,17,0.0,2
|
||||
88,62,20,17,9,0.5,1
|
||||
88,67,21,11,11,0.5,1
|
||||
92,54,22,20,7,0.5,1
|
||||
90,60,25,19,5,0.5,1
|
||||
89,52,13,24,15,0.5,1
|
||||
82,62,17,17,15,0.5,1
|
||||
90,64,61,32,13,0.5,1
|
||||
86,77,25,19,18,0.5,1
|
||||
96,67,29,20,11,0.5,1
|
||||
91,78,20,31,18,0.5,1
|
||||
89,67,23,16,10,0.5,1
|
||||
89,79,17,17,16,0.5,1
|
||||
91,107,20,20,56,0.5,1
|
||||
94,116,11,33,11,0.5,1
|
||||
92,59,35,13,19,0.5,1
|
||||
93,23,35,20,20,0.5,1
|
||||
90,60,23,27,5,0.5,1
|
||||
96,68,18,19,19,0.5,1
|
||||
84,80,47,33,97,0.5,1
|
||||
92,70,24,13,26,0.5,1
|
||||
90,47,28,15,18,0.5,1
|
||||
88,66,20,21,10,0.5,1
|
||||
91,102,17,13,19,0.5,1
|
||||
87,41,31,19,16,0.5,1
|
||||
86,79,28,16,17,0.5,1
|
||||
91,57,31,23,42,0.5,1
|
||||
93,77,32,18,29,0.5,1
|
||||
88,96,28,21,40,0.5,1
|
||||
94,65,22,18,11,0.5,1
|
||||
91,72,155,68,82,0.5,2
|
||||
85,54,47,33,22,0.5,2
|
||||
79,39,14,19,9,0.5,2
|
||||
85,85,25,26,30,0.5,2
|
||||
89,63,24,20,38,0.5,2
|
||||
84,92,68,37,44,0.5,2
|
||||
89,68,26,39,42,0.5,2
|
||||
89,101,18,25,13,0.5,2
|
||||
86,84,18,14,16,0.5,2
|
||||
85,65,25,14,18,0.5,2
|
||||
88,61,19,21,13,0.5,2
|
||||
92,56,14,16,10,0.5,2
|
||||
95,50,29,25,50,0.5,2
|
||||
91,75,24,22,11,0.5,2
|
||||
83,40,29,25,38,0.5,2
|
||||
89,74,19,23,16,0.5,2
|
||||
85,64,24,22,11,0.5,2
|
||||
92,57,64,36,90,0.5,2
|
||||
94,48,11,23,43,0.5,2
|
||||
87,52,21,19,30,0.5,2
|
||||
85,65,23,29,15,0.5,2
|
||||
84,82,21,21,19,0.5,2
|
||||
88,49,20,22,19,0.5,2
|
||||
96,67,26,26,36,0.5,2
|
||||
90,63,24,24,24,0.5,2
|
||||
90,45,33,34,27,0.5,2
|
||||
90,72,14,15,18,0.5,2
|
||||
91,55,4,8,13,0.5,2
|
||||
91,52,15,22,11,0.5,2
|
||||
87,71,32,19,27,1.0,1
|
||||
89,77,26,20,19,1.0,1
|
||||
89,67,5,17,14,1.0,2
|
||||
85,51,26,24,23,1.0,2
|
||||
103,75,19,30,13,1.0,2
|
||||
90,63,16,21,14,1.0,2
|
||||
90,63,29,23,57,2.0,1
|
||||
90,67,35,19,35,2.0,1
|
||||
87,66,27,22,9,2.0,1
|
||||
90,73,34,21,22,2.0,1
|
||||
86,54,20,21,16,2.0,1
|
||||
90,80,19,14,42,2.0,1
|
||||
87,90,43,28,156,2.0,2
|
||||
96,72,28,19,30,2.0,2
|
||||
91,55,9,25,16,2.0,2
|
||||
95,78,27,25,30,2.0,2
|
||||
92,101,34,30,64,2.0,2
|
||||
89,51,41,22,48,2.0,2
|
||||
91,99,42,33,16,2.0,2
|
||||
94,58,21,18,26,2.0,2
|
||||
92,60,30,27,297,2.0,2
|
||||
94,58,21,18,26,2.0,2
|
||||
88,47,33,26,29,2.0,2
|
||||
92,65,17,25,9,2.0,2
|
||||
92,79,22,20,11,3.0,1
|
||||
84,83,20,25,7,3.0,1
|
||||
88,68,27,21,26,3.0,1
|
||||
86,48,20,20,6,3.0,1
|
||||
99,69,45,32,30,3.0,1
|
||||
88,66,23,12,15,3.0,1
|
||||
89,62,42,30,20,3.0,1
|
||||
90,51,23,17,27,3.0,1
|
||||
81,61,32,37,53,3.0,2
|
||||
89,89,23,18,104,3.0,2
|
||||
89,65,26,18,36,3.0,2
|
||||
92,75,26,26,24,3.0,2
|
||||
85,59,25,20,25,3.0,2
|
||||
92,61,18,13,81,3.0,2
|
||||
89,63,22,27,10,4.0,1
|
||||
90,84,18,23,13,4.0,1
|
||||
88,95,25,19,14,4.0,1
|
||||
89,35,27,29,17,4.0,1
|
||||
91,80,37,23,27,4.0,1
|
||||
91,109,33,15,18,4.0,1
|
||||
91,65,17,5,7,4.0,1
|
||||
88,107,29,20,50,4.0,2
|
||||
87,76,22,55,9,4.0,2
|
||||
87,86,28,23,21,4.0,2
|
||||
87,42,26,23,17,4.0,2
|
||||
88,80,24,25,17,4.0,2
|
||||
90,96,34,49,169,4.0,2
|
||||
86,67,11,15,8,4.0,2
|
||||
92,40,19,20,21,4.0,2
|
||||
85,60,17,21,14,4.0,2
|
||||
89,90,15,17,25,4.0,2
|
||||
91,57,15,16,16,4.0,2
|
||||
96,55,48,39,42,4.0,2
|
||||
79,101,17,27,23,4.0,2
|
||||
90,134,14,20,14,4.0,2
|
||||
89,76,14,21,24,4.0,2
|
||||
88,93,29,27,31,4.0,2
|
||||
90,67,10,16,16,4.0,2
|
||||
92,73,24,21,48,4.0,2
|
||||
91,55,28,28,82,4.0,2
|
||||
83,45,19,21,13,4.0,2
|
||||
90,74,19,14,22,4.0,2
|
||||
92,66,21,16,33,5.0,1
|
||||
93,63,26,18,18,5.0,1
|
||||
86,78,47,39,107,5.0,2
|
||||
97,44,113,45,150,5.0,2
|
||||
87,59,15,19,12,5.0,2
|
||||
86,44,21,11,15,5.0,2
|
||||
87,64,16,20,24,5.0,2
|
||||
92,57,21,23,22,5.0,2
|
||||
90,70,25,23,112,5.0,2
|
||||
99,59,17,19,11,5.0,2
|
||||
92,80,10,26,20,6.0,1
|
||||
95,60,26,22,28,6.0,1
|
||||
91,63,25,26,15,6.0,1
|
||||
92,62,37,21,36,6.0,1
|
||||
95,50,13,14,15,6.0,1
|
||||
90,76,37,19,50,6.0,1
|
||||
96,70,70,26,36,6.0,1
|
||||
95,62,64,42,76,6.0,1
|
||||
92,62,20,23,20,6.0,1
|
||||
91,63,25,26,15,6.0,1
|
||||
82,56,67,38,92,6.0,2
|
||||
92,82,27,24,37,6.0,2
|
||||
90,63,12,26,21,6.0,2
|
||||
88,37,9,15,16,6.0,2
|
||||
100,60,29,23,76,6.0,2
|
||||
98,43,35,23,69,6.0,2
|
||||
91,74,87,50,67,6.0,2
|
||||
92,87,57,25,44,6.0,2
|
||||
93,99,36,34,48,6.0,2
|
||||
90,72,17,19,19,6.0,2
|
||||
97,93,21,20,68,6.0,2
|
||||
93,50,18,25,17,6.0,2
|
||||
90,57,20,26,33,6.0,2
|
||||
92,76,31,28,41,6.0,2
|
||||
88,55,19,17,14,6.0,2
|
||||
89,63,24,29,29,6.0,2
|
||||
92,79,70,32,84,7.0,1
|
||||
92,93,58,35,120,7.0,1
|
||||
93,84,58,47,62,7.0,2
|
||||
97,71,29,22,52,8.0,1
|
||||
84,99,33,19,26,8.0,1
|
||||
96,44,42,23,73,8.0,1
|
||||
90,62,22,21,21,8.0,1
|
||||
92,94,18,17,6,8.0,1
|
||||
90,67,77,39,114,8.0,1
|
||||
97,71,29,22,52,8.0,1
|
||||
91,69,25,25,66,8.0,2
|
||||
93,59,17,20,14,8.0,2
|
||||
92,95,85,48,200,8.0,2
|
||||
90,50,26,22,53,8.0,2
|
||||
91,62,59,47,60,8.0,2
|
||||
92,93,22,28,123,9.0,1
|
||||
92,77,86,41,31,10.0,1
|
||||
86,66,22,24,26,10.0,2
|
||||
98,57,31,34,73,10.0,2
|
||||
95,80,50,64,55,10.0,2
|
||||
92,108,53,33,94,12.0,2
|
||||
97,92,22,28,49,12.0,2
|
||||
93,77,39,37,108,16.0,1
|
||||
94,83,81,34,201,20.0,1
|
||||
87,75,25,21,14,0.0,1
|
||||
88,56,23,18,12,0.0,1
|
||||
84,97,41,20,32,0.0,2
|
||||
94,91,27,20,15,0.5,1
|
||||
97,62,17,13,5,0.5,1
|
||||
92,85,25,20,12,0.5,1
|
||||
82,48,27,15,12,0.5,1
|
||||
88,74,31,25,15,0.5,1
|
||||
95,77,30,14,21,0.5,1
|
||||
88,94,26,18,8,0.5,1
|
||||
91,70,19,19,22,0.5,1
|
||||
83,54,27,15,12,0.5,1
|
||||
91,105,40,26,56,0.5,1
|
||||
86,79,37,28,14,0.5,1
|
||||
91,96,35,22,135,0.5,1
|
||||
89,82,23,14,35,0.5,1
|
||||
90,73,24,23,11,0.5,1
|
||||
90,87,19,25,19,0.5,1
|
||||
89,82,33,32,18,0.5,1
|
||||
85,79,17,8,9,0.5,1
|
||||
85,119,30,26,17,0.5,1
|
||||
78,69,24,18,31,0.5,1
|
||||
88,107,34,21,27,0.5,1
|
||||
89,115,17,27,7,0.5,1
|
||||
92,67,23,15,12,0.5,1
|
||||
89,101,27,34,14,0.5,1
|
||||
91,84,11,12,10,0.5,1
|
||||
94,101,41,20,53,0.5,2
|
||||
88,46,29,22,18,0.5,2
|
||||
88,122,35,29,42,0.5,2
|
||||
84,88,28,25,35,0.5,2
|
||||
90,79,18,15,24,0.5,2
|
||||
87,69,22,26,11,0.5,2
|
||||
65,63,19,20,14,0.5,2
|
||||
90,64,12,17,14,0.5,2
|
||||
85,58,18,24,16,0.5,2
|
||||
88,81,41,27,36,0.5,2
|
||||
86,78,52,29,62,0.5,2
|
||||
82,74,38,28,48,0.5,2
|
||||
86,58,36,27,59,0.5,2
|
||||
94,56,30,18,27,0.5,2
|
||||
87,57,30,30,22,0.5,2
|
||||
98,74,148,75,159,0.5,2
|
||||
94,75,20,25,38,0.5,2
|
||||
83,68,17,20,71,0.5,2
|
||||
93,56,25,21,33,0.5,2
|
||||
101,65,18,21,22,0.5,2
|
||||
92,65,25,20,31,0.5,2
|
||||
92,58,14,16,13,0.5,2
|
||||
86,58,16,23,23,0.5,2
|
||||
85,62,15,13,22,0.5,2
|
||||
86,57,13,20,13,0.5,2
|
||||
86,54,26,30,13,0.5,2
|
||||
81,41,33,27,34,1.0,1
|
||||
91,67,32,26,13,1.0,1
|
||||
91,80,21,19,14,1.0,1
|
||||
92,60,23,15,19,1.0,1
|
||||
91,60,32,14,8,1.0,1
|
||||
93,65,28,22,10,1.0,1
|
||||
90,63,45,24,85,1.0,2
|
||||
87,92,21,22,37,1.0,2
|
||||
83,78,31,19,115,1.0,2
|
||||
95,62,24,23,14,1.0,2
|
||||
93,59,41,30,48,1.0,2
|
||||
84,82,43,32,38,2.0,1
|
||||
87,71,33,20,22,2.0,1
|
||||
86,44,24,15,18,2.0,1
|
||||
86,66,28,24,21,2.0,1
|
||||
88,58,31,17,17,2.0,1
|
||||
90,61,28,29,31,2.0,1
|
||||
88,69,70,24,64,2.0,1
|
||||
93,87,18,17,26,2.0,1
|
||||
98,58,33,21,28,2.0,1
|
||||
91,44,18,18,23,2.0,2
|
||||
87,75,37,19,70,2.0,2
|
||||
94,91,30,26,25,2.0,2
|
||||
88,85,14,15,10,2.0,2
|
||||
89,109,26,25,27,2.0,2
|
||||
87,59,37,27,34,2.0,2
|
||||
93,58,20,23,18,2.0,2
|
||||
88,57,9,15,16,2.0,2
|
||||
94,65,38,27,17,3.0,1
|
||||
91,71,12,22,11,3.0,1
|
||||
90,55,20,20,16,3.0,1
|
||||
91,64,21,17,26,3.0,2
|
||||
88,47,35,26,33,3.0,2
|
||||
82,72,31,20,84,3.0,2
|
||||
85,58,83,49,51,3.0,2
|
||||
91,54,25,22,35,4.0,1
|
||||
98,50,27,25,53,4.0,2
|
||||
86,62,29,21,26,4.0,2
|
||||
89,48,32,22,14,4.0,2
|
||||
82,68,20,22,9,4.0,2
|
||||
83,70,17,19,23,4.0,2
|
||||
96,70,21,26,21,4.0,2
|
||||
94,117,77,56,52,4.0,2
|
||||
93,45,11,14,21,4.0,2
|
||||
93,49,27,21,29,4.0,2
|
||||
84,73,46,32,39,4.0,2
|
||||
91,63,17,17,46,4.0,2
|
||||
90,57,31,18,37,4.0,2
|
||||
87,45,19,13,16,4.0,2
|
||||
91,68,14,20,19,4.0,2
|
||||
86,55,29,35,108,4.0,2
|
||||
91,86,52,47,52,4.0,2
|
||||
88,46,15,33,55,4.0,2
|
||||
85,52,22,23,34,4.0,2
|
||||
89,72,33,27,55,4.0,2
|
||||
95,59,23,18,19,4.0,2
|
||||
94,43,154,82,121,4.0,2
|
||||
96,56,38,26,23,5.0,2
|
||||
90,52,10,17,12,5.0,2
|
||||
94,45,20,16,12,5.0,2
|
||||
99,42,14,21,49,5.0,2
|
||||
93,102,47,23,37,5.0,2
|
||||
94,71,25,26,31,5.0,2
|
||||
92,73,33,34,115,5.0,2
|
||||
87,54,41,29,23,6.0,1
|
||||
92,67,15,14,14,6.0,1
|
||||
98,101,31,26,32,6.0,1
|
||||
92,53,51,33,92,6.0,1
|
||||
97,94,43,43,82,6.0,1
|
||||
93,43,11,16,54,6.0,1
|
||||
93,68,24,18,19,6.0,1
|
||||
95,36,38,19,15,6.0,1
|
||||
99,86,58,42,203,6.0,1
|
||||
98,66,103,57,114,6.0,1
|
||||
92,80,10,26,20,6.0,1
|
||||
96,74,27,25,43,6.0,2
|
||||
95,93,21,27,47,6.0,2
|
||||
86,109,16,22,28,6.0,2
|
||||
91,46,30,24,39,7.0,2
|
||||
102,82,34,78,203,7.0,2
|
||||
85,50,12,18,14,7.0,2
|
||||
91,57,33,23,12,8.0,1
|
||||
91,52,76,32,24,8.0,1
|
||||
93,70,46,30,33,8.0,1
|
||||
87,55,36,19,25,8.0,1
|
||||
98,123,28,24,31,8.0,1
|
||||
82,55,18,23,44,8.0,2
|
||||
95,73,20,25,225,8.0,2
|
||||
97,80,17,20,53,8.0,2
|
||||
100,83,25,24,28,8.0,2
|
||||
88,91,56,35,126,9.0,2
|
||||
91,138,45,21,48,10.0,1
|
||||
92,41,37,22,37,10.0,1
|
||||
86,123,20,25,23,10.0,2
|
||||
91,93,35,34,37,10.0,2
|
||||
87,87,15,23,11,10.0,2
|
||||
87,56,52,43,55,10.0,2
|
||||
99,75,26,24,41,12.0,1
|
||||
96,69,53,43,203,12.0,2
|
||||
98,77,55,35,89,15.0,1
|
||||
91,68,27,26,14,16.0,1
|
||||
98,99,57,45,65,20.0,1
|
180
tests/datasets/test.arff
Executable file
180
tests/datasets/test.arff
Executable file
@@ -0,0 +1,180 @@
|
||||
% 1. Title: Test Feature extracted from Glass
|
||||
%
|
||||
|
||||
@RELATION test
|
||||
|
||||
@ATTRIBUTE Mg REAL
|
||||
@ATTRIBUTE Type {0,1,2,3,4,5,6}
|
||||
|
||||
@DATA
|
||||
3.5,0
|
||||
3.52,1
|
||||
1.74,2
|
||||
0.0,3
|
||||
2.85,3
|
||||
3.65,1
|
||||
2.84,0
|
||||
0.0,4
|
||||
3.9,3
|
||||
3.58,3
|
||||
3.25,3
|
||||
3.76,1
|
||||
3.45,1
|
||||
3.48,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
2.96,3
|
||||
3.65,0
|
||||
0.0,3
|
||||
3.74,3
|
||||
3.66,0
|
||||
1.61,5
|
||||
3.49,3
|
||||
3.52,3
|
||||
3.54,3
|
||||
3.53,1
|
||||
3.56,3
|
||||
3.34,1
|
||||
0.0,2
|
||||
3.61,3
|
||||
3.6,0
|
||||
3.46,0
|
||||
2.72,3
|
||||
3.51,3
|
||||
3.09,3
|
||||
3.48,0
|
||||
3.8,3
|
||||
3.58,3
|
||||
3.54,1
|
||||
3.42,0
|
||||
2.68,5
|
||||
3.49,0
|
||||
3.68,3
|
||||
3.6,0
|
||||
3.59,3
|
||||
0.0,4
|
||||
3.54,0
|
||||
2.2,4
|
||||
3.59,0
|
||||
3.66,1
|
||||
3.87,3
|
||||
3.45,0
|
||||
3.82,0
|
||||
3.72,0
|
||||
3.33,0
|
||||
3.78,1
|
||||
2.24,2
|
||||
3.83,3
|
||||
0.78,2
|
||||
3.9,3
|
||||
3.58,0
|
||||
3.57,3
|
||||
3.52,0
|
||||
3.47,0
|
||||
3.48,0
|
||||
3.66,0
|
||||
3.62,0
|
||||
3.39,0
|
||||
0.0,3
|
||||
3.55,0
|
||||
1.01,3
|
||||
0.0,4
|
||||
3.43,0
|
||||
3.58,0
|
||||
0.0,4
|
||||
3.74,0
|
||||
0.0,4
|
||||
3.44,3
|
||||
3.97,3
|
||||
3.6,0
|
||||
3.64,3
|
||||
1.83,4
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
3.73,0
|
||||
3.58,1
|
||||
3.34,4
|
||||
2.09,3
|
||||
2.71,0
|
||||
3.18,3
|
||||
3.43,0
|
||||
3.15,3
|
||||
3.56,0
|
||||
0.0,4
|
||||
0.0,4
|
||||
4.49,0
|
||||
3.59,0
|
||||
3.56,3
|
||||
3.52,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
3.61,0
|
||||
3.74,0
|
||||
3.62,3
|
||||
3.84,0
|
||||
3.67,0
|
||||
3.58,0
|
||||
0.0,5
|
||||
3.66,3
|
||||
3.68,3
|
||||
2.28,3
|
||||
3.67,3
|
||||
3.2,4
|
||||
3.81,0
|
||||
0.0,3
|
||||
3.39,1
|
||||
3.57,0
|
||||
1.85,5
|
||||
3.75,0
|
||||
3.76,3
|
||||
0.0,3
|
||||
3.86,0
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,3
|
||||
3.5,0
|
||||
3.67,3
|
||||
3.55,3
|
||||
0.0,4
|
||||
3.61,3
|
||||
3.41,3
|
||||
3.7,0
|
||||
0.0,4
|
||||
3.58,0
|
||||
1.78,4
|
||||
3.85,0
|
||||
3.48,0
|
||||
1.71,5
|
||||
0.0,2
|
||||
3.5,3
|
||||
3.49,3
|
||||
3.36,1
|
||||
0.0,3
|
||||
3.54,0
|
||||
0.0,4
|
||||
2.87,0
|
||||
3.57,1
|
||||
3.48,0
|
||||
2.81,0
|
||||
0.0,4
|
||||
0.0,2
|
||||
3.62,3
|
||||
3.58,3
|
||||
3.45,3
|
||||
3.48,3
|
||||
3.54,3
|
||||
3.54,3
|
||||
3.37,0
|
||||
3.41,1
|
||||
3.48,0
|
||||
3.43,3
|
||||
3.5,0
|
||||
0.0,4
|
||||
3.54,0
|
||||
3.52,3
|
||||
3.52,3
|
||||
2.88,3
|
||||
2.19,2
|
24
tests/test
24
tests/test
@@ -1,12 +1,20 @@
|
||||
if [ -d build ] ; then
|
||||
rm -fr build
|
||||
fi
|
||||
if [ -d gcovr-report ] ; then
|
||||
rm -fr gcovr-report
|
||||
fi
|
||||
cmake -S . -B build -Wno-dev
|
||||
if test $? -ne 0; then
|
||||
echo "Error in creating build commands."
|
||||
exit 1
|
||||
fi
|
||||
cmake --build build
|
||||
if test $? -ne 0; then
|
||||
echo "Error in build command."
|
||||
exit 1
|
||||
fi
|
||||
cd build
|
||||
ctest --output-on-failure
|
||||
cd ..
|
||||
if [ ! -d gcovr-report ] ; then
|
||||
mkdir gcovr-report
|
||||
fi
|
||||
rm -fr gcovr-report/* 2>/dev/null
|
||||
#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
||||
#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
|
||||
#lcov --list lcoverage/main_coverage.info
|
||||
cd ..
|
||||
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml
|
||||
|
@@ -2,6 +2,7 @@
|
||||
#define TYPES_H
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
@@ -10,7 +11,7 @@ namespace mdlp {
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<tuple<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<pair<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user