Merge pull request #5 from rmontanana/hiperparameters

-Fix a big mistake in sortIndices method (removed unneeded loop)

-Add three hyperparameters to algorithm:
 * max_depth: maximum level of recursion when looking for cut point candidates.
 * min_length: minimum length of the interval of samples to be searched for candidates.
 * max_cut: Maximum number of cutpoints. This could be achieved in two ways: a natural number meaning the maximum number of outpoints in each feature of the dataset, or this number could be a number int the range (0, 1) meaning a proportion of the number of samples.
This commit is contained in:
Ricardo Montañana Gómez
2023-04-01 19:05:12 +02:00
committed by GitHub
23 changed files with 1121 additions and 389 deletions

View File

@@ -3,7 +3,7 @@ on:
push: push:
branches: branches:
- main - main
- test - "*"
pull_request: pull_request:
types: [ opened, synchronize, reopened ] types: [ opened, synchronize, reopened ]
jobs: jobs:
@@ -13,25 +13,29 @@ jobs:
env: env:
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3.2.0
with: with:
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
- name: Install sonar-scanner and build-wrapper - name: Install sonar-scanner and build-wrapper
uses: SonarSource/sonarcloud-github-c-cpp@v1 uses: SonarSource/sonarcloud-github-c-cpp@v1
- name: Install lcov & gcovr
run: |
sudo apt-get -y install lcov
sudo apt-get -y install gcovr
- name: Tests & build-wrapper - name: Tests & build-wrapper
run: | run: |
mkdir build
cmake -S . -B build -Wno-dev
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
cd tests
mkdir build
cmake -S . -B build -Wno-dev cmake -S . -B build -Wno-dev
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
cd build cd build
ctest -C Release --output-on-failure make
ctest -C Release --output-on-failure --test-dir tests
cd ..
# gcovr -f CPPFImdlp.cpp -f Metrics.cpp --merge-mode-functions=separate --txt --sonarqube=coverage.xml
gcovr -f CPPFImdlp.cpp -f Metrics.cpp --txt --sonarqube=coverage.xml
- name: Run sonar-scanner - name: Run sonar-scanner
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
run: | run: |
sonar-scanner --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" sonar-scanner --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
--define sonar.coverageReportPaths=coverage.xml

1
.gitignore vendored
View File

@@ -36,3 +36,4 @@
cmake-* cmake-*
**/CMakeFiles **/CMakeFiles
.vscode/* .vscode/*
**/gcovr-report

8
.vscode/launch.json vendored
View File

@@ -5,12 +5,14 @@
"version": "0.2.0", "version": "0.2.0",
"configurations": [ "configurations": [
{ {
"name": "(lldb) Launch", "name": "lldb samplex",
"type": "cppdbg", "type": "lldb",
"request": "launch", "request": "launch",
"targetArchitecture": "arm64",
"program": "${workspaceRoot}/sample/build/sample", "program": "${workspaceRoot}/sample/build/sample",
"args": [ "args": [
"mfeat-factors" "-f",
"glass"
], ],
"stopAtEntry": false, "stopAtEntry": false,
"cwd": "${workspaceRoot}/sample/build/", "cwd": "${workspaceRoot}/sample/build/",

View File

@@ -1,6 +1,9 @@
{ {
"sonarlint.connectedMode.project": { "sonarlint.connectedMode.project": {
"connectionId": "sonarcloud", "connectionId": "rmontanana",
"projectKey": "rmontanana_mdlp" "projectKey": "rmontanana_mdlp"
} },
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
"cmake.configureOnOpen": true,
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json"
} }

29
.vscode/tasks.json vendored
View File

@@ -1,29 +0,0 @@
{
"tasks": [
{
"type": "cppbuild",
"label": "C/C++: clang++ build active file",
"command": "/usr/bin/clang++",
"args": [
"-fcolor-diagnostics",
"-fansi-escape-codes",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
}
],
"version": "2.0.0"
}

View File

@@ -1,7 +1,13 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(mdlp) project(mdlp)
if (POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif ()
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
add_library(mdlp CPPFImdlp.cpp Metrics.cpp) add_library(mdlp CPPFImdlp.cpp Metrics.cpp sample/sample.cpp)
add_subdirectory(sample)
add_subdirectory(tests)

View File

@@ -2,22 +2,38 @@
#include <algorithm> #include <algorithm>
#include <set> #include <set>
#include <cmath> #include <cmath>
#include <limits>
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include "Metrics.h" #include "Metrics.h"
namespace mdlp { namespace mdlp {
CPPFImdlp::CPPFImdlp(): indices(indices_t()), X(samples_t()), y(labels_t()), CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
metrics(Metrics(y, indices)) max_depth(max_depth_),
{ proposed_cuts(proposed) {
} }
CPPFImdlp::CPPFImdlp() = default;
CPPFImdlp::~CPPFImdlp() = default; CPPFImdlp::~CPPFImdlp() = default;
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_) size_t CPPFImdlp::compute_max_num_cut_points() const {
{ // Set the actual maximum number of cut points as a number or as a percentage of the number of samples
if (proposed_cuts == 0) {
return numeric_limits<size_t>::max();
}
if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
throw invalid_argument("wrong proposed num_cuts value");
}
if (proposed_cuts < 1)
return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
return static_cast<size_t>(proposed_cuts);
}
void CPPFImdlp::fit(samples_t &X_, labels_t &y_) {
X = X_; X = X_;
y = y_; y = y_;
num_cut_points = compute_max_num_cut_points();
depth = 0;
cutPoints.clear(); cutPoints.clear();
if (X.size() != y.size()) { if (X.size() != y.size()) {
throw invalid_argument("X and y must have the same size"); throw invalid_argument("X and y must have the same size");
@@ -25,18 +41,26 @@ namespace mdlp {
if (X.empty() || y.empty()) { if (X.empty() || y.empty()) {
throw invalid_argument("X and y must have at least one element"); throw invalid_argument("X and y must have at least one element");
} }
if (min_length < 3) {
throw invalid_argument("min_length must be greater than 2");
}
if (max_depth < 1) {
throw invalid_argument("max_depth must be greater than 0");
}
indices = sortIndices(X_, y_); indices = sortIndices(X_, y_);
metrics.setData(y, indices); metrics.setData(y, indices);
computeCutPoints(0, X.size()); computeCutPoints(0, X.size(), 1);
return *this;
} }
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) {
{ size_t n;
size_t n, m, idxPrev = cut - 1 >= start ? cut - 1 : cut; size_t m;
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
size_t idxNext = cut + 1 < end ? cut + 1 : cut; size_t idxNext = cut + 1 < end ? cut + 1 : cut;
bool backWall; // true if duplicates reach begining of the interval bool backWall; // true if duplicates reach beginning of the interval
precision_t previous, actual, next; precision_t previous;
precision_t actual;
precision_t next;
previous = X[indices[idxPrev]]; previous = X[indices[idxPrev]];
actual = X[indices[cut]]; actual = X[indices[cut]];
next = X[indices[idxNext]]; next = X[indices[idxNext]];
@@ -60,12 +84,15 @@ namespace mdlp {
return {(actual + previous) / 2, cut}; return {(actual + previous) / 2, cut};
} }
void CPPFImdlp::computeCutPoints(size_t start, size_t end) void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) {
{
size_t cut; size_t cut;
pair<precision_t, size_t> result; pair<precision_t, size_t> result;
if (end - start < 3) if (cutPoints.size() == num_cut_points)
return; return;
// Check if the interval length and the depth are Ok
if (end - start < min_length || depth_ > max_depth)
return;
depth = depth_ > depth ? depth_ : depth;
cut = getCandidate(start, end); cut = getCandidate(start, end);
if (cut == numeric_limits<size_t>::max()) if (cut == numeric_limits<size_t>::max())
return; return;
@@ -73,18 +100,20 @@ namespace mdlp {
result = valueCutPoint(start, cut, end); result = valueCutPoint(start, cut, end);
cut = result.second; cut = result.second;
cutPoints.push_back(result.first); cutPoints.push_back(result.first);
computeCutPoints(start, cut); computeCutPoints(start, cut, depth_ + 1);
computeCutPoints(cut, end); computeCutPoints(cut, end, depth_ + 1);
} }
} }
size_t CPPFImdlp::getCandidate(size_t start, size_t end) size_t CPPFImdlp::getCandidate(size_t start, size_t end) {
{
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
E(A, TA; S) is minimal amongst all the candidate cut points. */ E(A, TA; S) is minimal amongst all the candidate cut points. */
size_t candidate = numeric_limits<size_t>::max(), elements = end - start; size_t candidate = numeric_limits<size_t>::max();
size_t elements = end - start;
bool sameValues = true; bool sameValues = true;
precision_t entropy_left, entropy_right, minEntropy; precision_t entropy_left;
precision_t entropy_right;
precision_t minEntropy;
// Check if all the values of the variable in the interval are the same // Check if all the values of the variable in the interval are the same
for (size_t idx = start + 1; idx < end; idx++) { for (size_t idx = start + 1; idx < end; idx++) {
if (X[indices[idx]] != X[indices[start]]) { if (X[indices[idx]] != X[indices[start]]) {
@@ -99,8 +128,8 @@ namespace mdlp {
// Cutpoints are always on boundaries (definition 2) // Cutpoints are always on boundaries (definition 2)
if (y[indices[idx]] == y[indices[idx - 1]]) if (y[indices[idx]] == y[indices[idx - 1]])
continue; continue;
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx); entropy_left = precision_t(idx - start) / static_cast<float>(elements) * metrics.entropy(start, idx);
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end); entropy_right = precision_t(end - idx) / static_cast<float>(elements) * metrics.entropy(idx, end);
if (entropy_left + entropy_right < minEntropy) { if (entropy_left + entropy_right < minEntropy) {
minEntropy = entropy_left + entropy_right; minEntropy = entropy_left + entropy_right;
candidate = idx; candidate = idx;
@@ -109,15 +138,16 @@ namespace mdlp {
return candidate; return candidate;
} }
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) {
{ int k;
int k, k1, k2; int k1;
precision_t ig, delta; int k2;
precision_t ent, ent1, ent2; precision_t ig;
precision_t delta;
precision_t ent;
precision_t ent1;
precision_t ent2;
auto N = precision_t(end - start); auto N = precision_t(end - start);
if (N < 2) {
return false;
}
k = metrics.computeNumClasses(start, end); k = metrics.computeNumClasses(start, end);
k1 = metrics.computeNumClasses(start, cut); k1 = metrics.computeNumClasses(start, cut);
k2 = metrics.computeNumClasses(cut, end); k2 = metrics.computeNumClasses(cut, end);
@@ -125,18 +155,16 @@ namespace mdlp {
ent1 = metrics.entropy(start, cut); ent1 = metrics.entropy(start, cut);
ent2 = metrics.entropy(cut, end); ent2 = metrics.entropy(cut, end);
ig = metrics.informationGain(start, cut, end); ig = metrics.informationGain(start, cut, end);
delta = log2(pow(3, precision_t(k)) - 2) - delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2); (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
precision_t term = 1 / N * (log2(N - 1) + delta); precision_t term = 1 / N * (log2(N - 1) + delta);
return ig > term; return ig > term;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_) indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) {
{
indices_t idx(X_.size()); indices_t idx(X_.size());
iota(idx.begin(), idx.end(), 0); iota(idx.begin(), idx.end(), 0);
for (size_t i = 0; i < X_.size(); i++)
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) { stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
if (X_[i1] == X_[i2]) if (X_[i1] == X_[i2])
return y_[i1] < y_[i2]; return y_[i1] < y_[i2];
@@ -146,16 +174,12 @@ namespace mdlp {
return idx; return idx;
} }
cutPoints_t CPPFImdlp::getCutPoints() cutPoints_t CPPFImdlp::getCutPoints() {
{ sort(cutPoints.begin(), cutPoints.end());
// Remove duplicates and sort return cutPoints;
cutPoints_t output(cutPoints.size()); }
set<precision_t> s;
unsigned size = cutPoints.size(); int CPPFImdlp::get_depth() const {
for (unsigned i = 0; i < size; i++) return depth;
s.insert(cutPoints[i]);
output.assign(s.begin(), s.end());
sort(output.begin(), output.end());
return output;
} }
} }

View File

@@ -1,29 +1,52 @@
#ifndef CPPFIMDLP_H #ifndef CPPFIMDLP_H
#define CPPFIMDLP_H #define CPPFIMDLP_H
#include "typesFImdlp.h" #include "typesFImdlp.h"
#include "Metrics.h" #include "Metrics.h"
#include <limits>
#include <utility> #include <utility>
#include <string> #include <string>
namespace mdlp { namespace mdlp {
class CPPFImdlp { class CPPFImdlp {
protected: protected:
indices_t indices; size_t min_length = 3;
samples_t X; int depth = 0;
labels_t y; int max_depth = numeric_limits<int>::max();
Metrics metrics; float proposed_cuts = 0;
indices_t indices = indices_t();
samples_t X = samples_t();
labels_t y = labels_t();
Metrics metrics = Metrics(y, indices);
cutPoints_t cutPoints; cutPoints_t cutPoints;
size_t num_cut_points = numeric_limits<size_t>::max();
static indices_t sortIndices(samples_t&, labels_t&); static indices_t sortIndices(samples_t&, labels_t&);
void computeCutPoints(size_t, size_t);
void computeCutPoints(size_t, size_t, int);
bool mdlp(size_t, size_t, size_t); bool mdlp(size_t, size_t, size_t);
size_t getCandidate(size_t, size_t); size_t getCandidate(size_t, size_t);
size_t compute_max_num_cut_points() const;
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t); pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
public: public:
CPPFImdlp(); CPPFImdlp();
CPPFImdlp(size_t, int, float);
~CPPFImdlp(); ~CPPFImdlp();
CPPFImdlp& fit(samples_t&, labels_t&);
samples_t getCutPoints(); void fit(samples_t&, labels_t&);
inline string version() { return "1.1.1"; };
cutPoints_t getCutPoints();
int get_depth() const;
static inline string version() { return "1.1.1"; };
}; };
} }
#endif #endif

View File

@@ -1,30 +1,32 @@
#include "Metrics.h" #include "Metrics.h"
#include <set> #include <set>
#include <cmath> #include <cmath>
using namespace std; using namespace std;
namespace mdlp { namespace mdlp {
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t()) Metrics::Metrics(labels_t &y_, indices_t &indices_) : y(y_), indices(indices_),
{ numClasses(computeNumClasses(0, indices.size())) {
} }
int Metrics::computeNumClasses(size_t start, size_t end)
{ int Metrics::computeNumClasses(size_t start, size_t end) {
set<int> nClasses; set<int> nClasses;
for (auto i = start; i < end; ++i) { for (auto i = start; i < end; ++i) {
nClasses.insert(y[indices[i]]); nClasses.insert(y[indices[i]]);
} }
return nClasses.size(); return static_cast<int>(nClasses.size());
} }
void Metrics::setData(labels_t& y_, indices_t& indices_)
{ void Metrics::setData(const labels_t &y_, const indices_t &indices_) {
indices = indices_; indices = indices_;
y = y_; y = y_;
numClasses = computeNumClasses(0, indices.size()); numClasses = computeNumClasses(0, indices.size());
entropyCache.clear(); entropyCache.clear();
igCache.clear(); igCache.clear();
} }
precision_t Metrics::entropy(size_t start, size_t end)
{ precision_t Metrics::entropy(size_t start, size_t end) {
precision_t p, ventropy = 0; precision_t p;
precision_t ventropy = 0;
int nElements = 0; int nElements = 0;
labels_t counts(numClasses + 1, 0); labels_t counts(numClasses + 1, 0);
if (end - start < 2) if (end - start < 2)
@@ -38,26 +40,32 @@ namespace mdlp {
} }
for (auto count: counts) { for (auto count: counts) {
if (count > 0) { if (count > 0) {
p = (precision_t)count / nElements; p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
ventropy -= p * log2(p); ventropy -= p * log2(p);
} }
} }
entropyCache[{start, end}] = ventropy; entropyCache[{start, end}] = ventropy;
return ventropy; return ventropy;
} }
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
{ precision_t Metrics::informationGain(size_t start, size_t cut, size_t end) {
precision_t iGain; precision_t iGain;
precision_t entropyInterval, entropyLeft, entropyRight; precision_t entropyInterval;
int nElementsLeft = cut - start, nElementsRight = end - cut; precision_t entropyLeft;
int nElements = end - start; precision_t entropyRight;
size_t nElementsLeft = cut - start;
size_t nElementsRight = end - cut;
size_t nElements = end - start;
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) { if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
return igCache[make_tuple(start, cut, end)]; return igCache[make_tuple(start, cut, end)];
} }
entropyInterval = entropy(start, end); entropyInterval = entropy(start, end);
entropyLeft = entropy(start, cut); entropyLeft = entropy(start, cut);
entropyRight = entropy(cut, end); entropyRight = entropy(cut, end);
iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements; iGain = entropyInterval -
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
static_cast<precision_t>(nElementsRight) * entropyRight) /
static_cast<precision_t>(nElements);
igCache[make_tuple(start, cut, end)] = iGain; igCache[make_tuple(start, cut, end)] = iGain;
return iGain; return iGain;
} }

View File

@@ -1,19 +1,25 @@
#ifndef CCMETRICS_H #ifndef CCMETRICS_H
#define CCMETRICS_H #define CCMETRICS_H
#include "typesFImdlp.h" #include "typesFImdlp.h"
namespace mdlp { namespace mdlp {
class Metrics { class Metrics {
protected: protected:
labels_t &y; labels_t &y;
indices_t &indices; indices_t &indices;
int numClasses; int numClasses;
cacheEnt_t entropyCache; cacheEnt_t entropyCache = cacheEnt_t();
cacheIg_t igCache; cacheIg_t igCache = cacheIg_t();
public: public:
Metrics(labels_t &, indices_t &); Metrics(labels_t &, indices_t &);
void setData(labels_t&, indices_t&);
void setData(const labels_t &, const indices_t &);
int computeNumClasses(size_t, size_t); int computeNumClasses(size_t, size_t);
precision_t entropy(size_t, size_t); precision_t entropy(size_t, size_t);
precision_t informationGain(size_t, size_t, size_t); precision_t informationGain(size_t, size_t, size_t);
}; };
} }

View File

@@ -28,7 +28,8 @@ mkdir build
cd build cd build
cmake .. cmake ..
make make
./sample iris ./sample -f iris -m 2
./sample -h
``` ```
## Test ## Test

21
sample/.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,21 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "lldb puro",
"type": "cppdbg",
// "targetArchitecture": "arm64",
"request": "launch",
"program": "${workspaceRoot}/build/sample",
"args": [
"-f",
"iris"
],
"stopAtEntry": false,
"cwd": "${workspaceRoot}/build/",
"environment": [],
"externalConsole": false,
"MIMode": "lldb"
},
]
}

View File

@@ -1,5 +1,3 @@
cmake_minimum_required(VERSION 3.20)
project(main)
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)

View File

@@ -1,33 +1,98 @@
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <iomanip> #include <iomanip>
#include <chrono>
#include <algorithm>
#include <cstring>
#include <getopt.h>
#include "../CPPFImdlp.h" #include "../CPPFImdlp.h"
#include "../tests/ArffFiles.h" #include "../tests/ArffFiles.h"
using namespace std; using namespace std;
using namespace mdlp; using namespace mdlp;
const string PATH = "../../tests/datasets/";
int main(int argc, char** argv) /* print a description of all supported options */
{ void usage(const char *path) {
ArffFiles file; /* take only the last portion of the path */
string path = "../../tests/datasets/"; const char *basename = strrchr(path, '/');
map<string, bool> datasets = { basename = basename ? basename + 1 : path;
{"mfeat-factors", true},
{"iris", true}, cout << "usage: " << basename << "[OPTION]" << endl;
{"letter", true}, cout << " -h, --help\t\t Print this help and exit." << endl;
{"glass", true}, cout
{"kdd_JapaneseVowels", false}, << " -f, --file[=FILENAME]\t {all, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
{"test", true} << endl;
}; cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) { cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
cout << "Usage: " << argv[0] << " {mfeat-factors, glass, iris, letter, kdd_JapaneseVowels, test}" << endl; cout
return 1; << " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any"
<< endl;
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
} }
file.load(path + argv[1] + ".arff", datasets[argv[1]]); tuple<string, string, int, int, float> parse_arguments(int argc, char **argv) {
string file_name;
string path = PATH;
int max_depth = numeric_limits<int>::max();
int min_length = 3;
float max_cutpoints = 0;
const option long_options[] = {
{"help", no_argument, nullptr, 'h'},
{"file", required_argument, nullptr, 'f'},
{"path", required_argument, nullptr, 'p'},
{"max_depth", required_argument, nullptr, 'm'},
{"max_cutpoints", required_argument, nullptr, 'c'},
{"min_length", required_argument, nullptr, 'n'},
{nullptr, no_argument, nullptr, 0}
};
while (true) {
const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, nullptr);
if (c == -1)
break;
switch (c) {
case 'h':
usage(argv[0]);
exit(0);
case 'f':
file_name = string(optarg);
break;
case 'm':
max_depth = stoi(optarg);
break;
case 'n':
min_length = stoi(optarg);
break;
case 'c':
max_cutpoints = stof(optarg);
break;
case 'p':
path = optarg;
if (path.back() != '/')
path += '/';
break;
case '?':
usage(argv[0]);
exit(1);
default:
abort();
}
}
if (file_name.empty()) {
usage(argv[0]);
exit(1);
}
return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
}
void process_file(const string &path, const string &file_name, bool class_last, int max_depth, int min_length,
float max_cutpoints) {
ArffFiles file;
file.load(path + file_name + ".arff", class_last);
auto attributes = file.getAttributes(); auto attributes = file.getAttributes();
int items = file.getSize(); auto items = file.getSize();
cout << "Number of lines: " << items << endl; cout << "Number of lines: " << items << endl;
cout << "Attributes: " << endl; cout << "Attributes: " << endl;
for (auto attribute: attributes) { for (auto attribute: attributes) {
@@ -38,13 +103,14 @@ int main(int argc, char** argv)
cout << "Data: " << endl; cout << "Data: " << endl;
vector<samples_t> &X = file.getX(); vector<samples_t> &X = file.getX();
labels_t &y = file.getY(); labels_t &y = file.getY();
for (int i = 0; i < 50; i++) { for (int i = 0; i < 5; i++) {
for (auto feature: X) { for (auto feature: X) {
cout << fixed << setprecision(1) << feature[i] << " "; cout << fixed << setprecision(1) << feature[i] << " ";
} }
cout << y[i] << endl; cout << y[i] << endl;
} }
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(); auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
auto total = 0;
for (auto i = 0; i < attributes.size(); i++) { for (auto i = 0; i < attributes.size(); i++) {
auto min_max = minmax_element(X[i].begin(), X[i].end()); auto min_max = minmax_element(X[i].begin(), X[i].end());
cout << "Cut points for " << get<0>(attributes[i]) << endl; cout << "Cut points for " << get<0>(attributes[i]) << endl;
@@ -54,6 +120,68 @@ int main(int argc, char** argv)
for (auto item: test.getCutPoints()) { for (auto item: test.getCutPoints()) {
cout << item << endl; cout << item << endl;
} }
total += test.getCutPoints().size();
}
cout << "Total cut points ...: " << total << endl;
cout << "Total feature states: " << total + attributes.size() << endl;
}
void process_all_files(const map<string, bool> &datasets, const string &path, int max_depth, int min_length,
float max_cutpoints) {
cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
<< max_cutpoints << endl << endl;
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
printf("==================== ==== ==== ========\n");
for (const auto &dataset: datasets) {
ArffFiles file;
file.load(path + dataset.first + ".arff", dataset.second);
auto attributes = file.getAttributes();
vector<samples_t> &X = file.getX();
labels_t &y = file.getY();
size_t timing = 0;
int cut_points = 0;
for (auto i = 0; i < attributes.size(); i++) {
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
test.fit(X[i], y);
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
timing += std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
cut_points += test.getCutPoints().size();
}
printf("%-20s %4lu %4d %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
}
}
int main(int argc, char **argv) {
map<string, bool> datasets = {
{"glass", true},
{"iris", true},
{"kdd_JapaneseVowels", false},
{"letter", true},
{"liver-disorders", true},
{"mfeat-factors", true},
{"test", true}
};
string file_name;
string path;
int max_depth;
int min_length;
float max_cutpoints;
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
cout << "Invalid file name: " << file_name << endl;
usage(argv[0]);
exit(1);
}
if (file_name == "all")
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
else {
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
cout << "File name ....: " << file_name << endl;
cout << "Max depth ....: " << max_depth << endl;
cout << "Min length ...: " << min_length << endl;
cout << "Max cutpoints : " << max_cutpoints << endl;
} }
return 0; return 0;
} }

View File

@@ -2,55 +2,60 @@
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <map> #include <map>
#include <iostream>
using namespace std; using namespace std;
ArffFiles::ArffFiles() ArffFiles::ArffFiles() = default;
{
} vector<string> ArffFiles::getLines() const {
vector<string> ArffFiles::getLines()
{
return lines; return lines;
} }
unsigned long int ArffFiles::getSize()
{ unsigned long int ArffFiles::getSize() const {
return lines.size(); return lines.size();
} }
vector<pair<string, string>> ArffFiles::getAttributes()
{ vector<pair<string, string>> ArffFiles::getAttributes() const {
return attributes; return attributes;
} }
string ArffFiles::getClassName()
{ string ArffFiles::getClassName() const {
return className; return className;
} }
string ArffFiles::getClassType()
{ string ArffFiles::getClassType() const {
return classType; return classType;
} }
vector<vector<float>>& ArffFiles::getX()
{ vector<vector<float>> &ArffFiles::getX() {
return X; return X;
} }
vector<int>& ArffFiles::getY()
{ vector<int> &ArffFiles::getY() {
return y; return y;
} }
void ArffFiles::load(string fileName, bool classLast)
{ void ArffFiles::load(const string &fileName, bool classLast) {
ifstream file(fileName); ifstream file(fileName);
string keyword, attribute, type; if (!file.is_open()) {
if (file.is_open()) { throw invalid_argument("Unable to open file");
}
string line; string line;
string keyword;
string attribute;
string type;
string type_w;
while (getline(file, line)) { while (getline(file, line)) {
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") { if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue; continue;
} }
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line); stringstream ss(line);
ss >> keyword >> attribute >> type; ss >> keyword >> attribute;
attributes.push_back({ attribute, type }); type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(attribute, trim(type));
continue; continue;
} }
if (line[0] == '@') { if (line[0] == '@') {
@@ -71,18 +76,18 @@ void ArffFiles::load(string fileName, bool classLast)
attributes.erase(attributes.begin()); attributes.erase(attributes.begin());
} }
generateDataset(classLast); generateDataset(classLast);
} else
throw invalid_argument("Unable to open file");
} }
void ArffFiles::generateDataset(bool classLast)
{ void ArffFiles::generateDataset(bool classLast) {
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size())); X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
vector<string> yy = vector<string>(lines.size(), ""); auto yy = vector<string>(lines.size(), "");
int labelIndex = classLast ? attributes.size() : 0; int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
for (int i = 0; i < lines.size(); i++) { for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]); stringstream ss(lines[i]);
string value; string value;
int pos = 0, xIndex = 0; int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) { while (getline(ss, value, ',')) {
if (pos++ == labelIndex) { if (pos++ == labelIndex) {
yy[i] = value; yy[i] = value;
@@ -93,20 +98,20 @@ void ArffFiles::generateDataset(bool classLast)
} }
y = factorize(yy); y = factorize(yy);
} }
string ArffFiles::trim(const string& source)
{ string ArffFiles::trim(const string &source) {
string s(source); string s(source);
s.erase(0, s.find_first_not_of(" \n\r\t")); s.erase(0, s.find_first_not_of(" \n\r\t"));
s.erase(s.find_last_not_of(" \n\r\t") + 1); s.erase(s.find_last_not_of(" \n\r\t") + 1);
return s; return s;
} }
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
{ vector<int> ArffFiles::factorize(const vector<string> &labels_t) {
vector<int> yy; vector<int> yy;
yy.reserve(labels_t.size()); yy.reserve(labels_t.size());
map<string, int> labelMap; map<string, int> labelMap;
int i = 0; int i = 0;
for (string label : labels_t) { for (const string &label: labels_t) {
if (labelMap.find(label) == labelMap.end()) { if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++; labelMap[label] = i++;
} }

View File

@@ -1,27 +1,44 @@
#ifndef ARFFFILES_H #ifndef ARFFFILES_H
#define ARFFFILES_H #define ARFFFILES_H
#include <string> #include <string>
#include <vector> #include <vector>
using namespace std; using namespace std;
class ArffFiles { class ArffFiles {
private: private:
vector<string> lines; vector<string> lines;
vector<pair<string, string>> attributes; vector<pair<string, string>> attributes;
string className, classType; string className;
string classType;
vector<vector<float>> X; vector<vector<float>> X;
vector<int> y; vector<int> y;
void generateDataset(bool); void generateDataset(bool);
public: public:
ArffFiles(); ArffFiles();
void load(string, bool = true);
vector<string> getLines(); void load(const string &, bool = true);
unsigned long int getSize();
string getClassName(); vector<string> getLines() const;
string getClassType();
string trim(const string&); unsigned long int getSize() const;
string getClassName() const;
string getClassType() const;
static string trim(const string &);
vector<vector<float>> &getX(); vector<vector<float>> &getX();
vector<int> &getY(); vector<int> &getY();
vector<pair<string, string>> getAttributes();
vector<int> factorize(const vector<string>& labels_t); vector<pair<string, string>> getAttributes() const;
static vector<int> factorize(const vector<string> &labels_t);
}; };
#endif #endif

View File

@@ -1,12 +1,9 @@
cmake_minimum_required(VERSION 3.14) set(CMAKE_CXX_STANDARD 11)
project(FImdlp)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 14)
include(FetchContent) include(FetchContent)
include_directories(${GTEST_INCLUDE_DIRS}) include_directories(${GTEST_INCLUDE_DIRS})
FetchContent_Declare( FetchContent_Declare(
googletest googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
@@ -18,7 +15,7 @@ FetchContent_MakeAvailable(googletest)
enable_testing() enable_testing()
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp) add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp) add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
target_link_libraries(Metrics_unittest GTest::gtest_main) target_link_libraries(Metrics_unittest GTest::gtest_main)
target_link_libraries(FImdlp_unittest GTest::gtest_main) target_link_libraries(FImdlp_unittest GTest::gtest_main)
target_compile_options(Metrics_unittest PRIVATE --coverage) target_compile_options(Metrics_unittest PRIVATE --coverage)

View File

@@ -1,22 +1,47 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "../Metrics.h" #include "../Metrics.h"
#include "../CPPFImdlp.h" #include "../CPPFImdlp.h"
#include "ArffFiles.h" #include <fstream>
#include <iostream> #include <iostream>
#include "ArffFiles.h"
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
try { \
stmt; \
} catch (const etype& ex) { \
EXPECT_EQ(whatstring, std::string(ex.what())); \
throw; \
} \
, etype)
namespace mdlp { namespace mdlp {
class TestFImdlp : public CPPFImdlp, public testing::Test { class TestFImdlp : public CPPFImdlp, public testing::Test {
public: public:
precision_t precision = 0.000001; precision_t precision = 0.000001f;
TestFImdlp() : CPPFImdlp() {} TestFImdlp() : CPPFImdlp() {}
void SetUp()
{ string data_path;
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
void SetUp() override {
X = {4.7f, 4.7f, 4.7f, 4.7f, 4.8f, 4.8f, 4.8f, 4.8f, 4.9f, 4.95f, 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f,
6.0f, 5.1f, 5.9f};
y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
fit(X, y); fit(X, y);
data_path = set_data_path();
} }
void checkSortedVector()
{ static string set_data_path() {
string path = "../datasets/";
ifstream file(path + "iris.arff");
if (file.is_open()) {
file.close();
return path;
}
return "../../tests/datasets/";
}
void checkSortedVector() {
indices_t testSortedIndices = sortIndices(X, y); indices_t testSortedIndices = sortIndices(X, y);
precision_t prev = X[testSortedIndices[0]]; precision_t prev = X[testSortedIndices[0]];
for (unsigned long i = 0; i < X.size(); ++i) { for (unsigned long i = 0; i < X.size(); ++i) {
@@ -25,25 +50,16 @@ namespace mdlp {
prev = X[testSortedIndices[i]]; prev = X[testSortedIndices[i]];
} }
} }
void checkCutPoints(cutPoints_t& expected)
{ void checkCutPoints(cutPoints_t &computed, cutPoints_t &expected) const {
int expectedSize = expected.size(); EXPECT_EQ(computed.size(), expected.size());
EXPECT_EQ(cutPoints.size(), expectedSize); for (unsigned long i = 0; i < computed.size(); i++) {
for (unsigned long i = 0; i < cutPoints.size(); i++) { cout << "(" << computed[i] << ", " << expected[i] << ") ";
EXPECT_NEAR(cutPoints[i], expected[i], precision); EXPECT_NEAR(computed[i], expected[i], precision);
} }
} }
template<typename T, typename A>
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed) bool test_result(const samples_t &X_, size_t cut, float midPoint, size_t limit, const string &title) {
{
EXPECT_EQ(expected.size(), computed.size());
ASSERT_EQ(expected.size(), computed.size());
for (auto i = 0; i < expected.size(); i++) {
EXPECT_NEAR(expected[i], computed[i], precision);
}
}
bool test_result(samples_t& X_, size_t cut, float midPoint, size_t limit, string title)
{
pair<precision_t, size_t> result; pair<precision_t, size_t> result;
labels_t y_ = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; labels_t y_ = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
X = X_; X = X_;
@@ -55,39 +71,74 @@ namespace mdlp {
EXPECT_EQ(result.second, limit); EXPECT_EQ(result.second, limit);
return true; return true;
} }
void test_dataset(CPPFImdlp &test, const string &filename, vector<cutPoints_t> &expected,
vector<int> &depths) const {
ArffFiles file;
file.load(data_path + filename + ".arff", true);
vector<samples_t> &X = file.getX();
labels_t &y = file.getY();
auto attributes = file.getAttributes();
for (auto feature = 0; feature < attributes.size(); feature++) {
test.fit(X[feature], y);
EXPECT_EQ(test.get_depth(), depths[feature]);
auto computed = test.getCutPoints();
cout << "Feature " << feature << ": ";
checkCutPoints(computed, expected[feature]);
cout << endl;
}
}
}; };
TEST_F(TestFImdlp, FitErrorEmptyDataset)
{ TEST_F(TestFImdlp, FitErrorEmptyDataset) {
X = samples_t(); X = samples_t();
y = labels_t(); y = labels_t();
EXPECT_THROW(fit(X, y), std::invalid_argument); EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have at least one element");
} }
TEST_F(TestFImdlp, FitErrorDifferentSize)
{ TEST_F(TestFImdlp, FitErrorDifferentSize) {
X = {1, 2, 3}; X = {1, 2, 3};
y = {1, 2}; y = {1, 2};
EXPECT_THROW(fit(X, y), std::invalid_argument); EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size");
} }
TEST_F(TestFImdlp, SortIndices)
{ TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth) {
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; auto testLength = CPPFImdlp(2, 10, 0);
auto testDepth = CPPFImdlp(3, 0, 0);
X = {1, 2, 3};
y = {1, 2, 3};
EXPECT_THROW_WITH_MESSAGE(testLength.fit(X, y), invalid_argument, "min_length must be greater than 2");
EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
}
TEST_F(TestFImdlp, FitErrorMaxCutPoints) {
auto testmin = CPPFImdlp(2, 10, -1);
auto testmax = CPPFImdlp(3, 0, 200);
X = {1, 2, 3};
y = {1, 2, 3};
EXPECT_THROW_WITH_MESSAGE(testmin.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
EXPECT_THROW_WITH_MESSAGE(testmax.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
}
TEST_F(TestFImdlp, SortIndices) {
X = {5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, 6.0f, 5.1f, 5.9f};
y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7}; indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7};
checkSortedVector(); checkSortedVector();
X = { 5.77, 5.88, 5.99 }; X = {5.77f, 5.88f, 5.99f};
y = {1, 2, 1}; y = {1, 2, 1};
indices = {0, 1, 2}; indices = {0, 1, 2};
checkSortedVector(); checkSortedVector();
X = { 5.33, 5.22, 5.11 }; X = {5.33f, 5.22f, 5.11f};
y = {1, 2, 1}; y = {1, 2, 1};
indices = {2, 1, 0}; indices = {2, 1, 0};
checkSortedVector(); checkSortedVector();
X = { 5.33, 5.22, 5.33 }; X = {5.33f, 5.22f, 5.33f};
y = {2, 2, 1}; y = {2, 2, 1};
indices = {1, 2, 0}; indices = {1, 2, 0};
} }
TEST_F(TestFImdlp, TestShortDatasets)
{ TEST_F(TestFImdlp, TestShortDatasets) {
vector<precision_t> computed; vector<precision_t> computed;
X = {1}; X = {1};
y = {1}; y = {1};
@@ -111,71 +162,137 @@ namespace mdlp {
EXPECT_EQ(computed.size(), 1); EXPECT_EQ(computed.size(), 1);
EXPECT_NEAR(computed[0], 1.5, precision); EXPECT_NEAR(computed[0], 1.5, precision);
} }
TEST_F(TestFImdlp, TestArtificialDataset)
{ TEST_F(TestFImdlp, TestArtificialDataset) {
fit(X, y); fit(X, y);
computeCutPoints(0, 20); cutPoints_t expected = {5.05f};
cutPoints_t expected = { 5.05 };
vector<precision_t> computed = getCutPoints(); vector<precision_t> computed = getCutPoints();
computed = getCutPoints();
int expectedSize = expected.size();
EXPECT_EQ(computed.size(), expected.size()); EXPECT_EQ(computed.size(), expected.size());
for (unsigned long i = 0; i < computed.size(); i++) { for (unsigned long i = 0; i < computed.size(); i++) {
EXPECT_NEAR(computed[i], expected[i], precision); EXPECT_NEAR(computed[i], expected[i], precision);
} }
} }
TEST_F(TestFImdlp, TestIris)
{
ArffFiles file;
string path = "../datasets/";
file.load(path + "iris.arff", true); TEST_F(TestFImdlp, TestIris) {
int items = file.getSize();
vector<samples_t>& X = file.getX();
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{ 5.4499998092651367, 5.75 }, {5.45f, 5.75f},
{ 2.75, 2.85, 2.95, 3.05, 3.35 }, {2.75f, 2.85f, 2.95f, 3.05f, 3.35f},
{ 2.4500000476837158, 4.75, 5.0500001907348633 }, {2.45f, 4.75f, 5.05f},
{ 0.80000001192092896, 1.75 } {0.8f, 1.75f}
}; };
labels_t& y = file.getY(); vector<int> depths = {3, 5, 4, 3};
auto attributes = file.getAttributes(); auto test = CPPFImdlp();
for (auto feature = 0; feature < attributes.size(); feature++) { test_dataset(test, "iris", expected, depths);
fit(X[feature], y);
vector<precision_t> computed = getCutPoints();
EXPECT_EQ(computed.size(), expected[feature].size());
for (auto i = 0; i < computed.size(); i++) {
EXPECT_NEAR(computed[i], expected[feature][i], precision);
} }
}
} TEST_F(TestFImdlp, ComputeCutPointsGCase) {
TEST_F(TestFImdlp, ComputeCutPointsGCase)
{
cutPoints_t expected; cutPoints_t expected;
expected = {1.5}; expected = {1.5};
samples_t X_ = {0, 1, 2, 2, 2}; samples_t X_ = {0, 1, 2, 2, 2};
labels_t y_ = {1, 1, 1, 2, 2}; labels_t y_ = {1, 1, 1, 2, 2};
fit(X_, y_); fit(X_, y_);
checkCutPoints(expected); auto computed = getCutPoints();
checkCutPoints(computed, expected);
} }
TEST_F(TestFImdlp, ValueCutPoint)
{ TEST_F(TestFImdlp, ValueCutPoint) {
// Case titles as stated in the doc // Case titles as stated in the doc
samples_t X1a{ 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 }; samples_t X1a{3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.0f};
test_result(X1a, 6, 7.3 / 2, 6, "1a"); test_result(X1a, 6, 7.3f / 2, 6, "1a");
samples_t X2a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; samples_t X2a = {3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
test_result(X2a, 6, 7.1 / 2, 4, "2a"); test_result(X2a, 6, 7.1f / 2, 4, "2a");
samples_t X2b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; samples_t X2b = {3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
test_result(X2b, 6, 7.5 / 2, 7, "2b"); test_result(X2b, 6, 7.5f / 2, 7, "2b");
samples_t X3a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; samples_t X3a = {3.f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
test_result(X3a, 4, 7.1 / 2, 4, "3a"); test_result(X3a, 4, 7.1f / 2, 4, "3a");
samples_t X3b = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 }; samples_t X3b = {3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f};
test_result(X3b, 4, 7.1 / 2, 4, "3b"); test_result(X3b, 4, 7.1f / 2, 4, "3b");
samples_t X4a = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.9, 4.0 }; samples_t X4a = {3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.9f, 4.0f};
test_result(X4a, 4, 6.9 / 2, 2, "4a"); test_result(X4a, 4, 6.9f / 2, 2, "4a");
samples_t X4b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; samples_t X4b = {3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f};
test_result(X4b, 4, 7.5 / 2, 7, "4b"); test_result(X4b, 4, 7.5f / 2, 7, "4b");
samples_t X4c = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 }; samples_t X4c = {3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f};
test_result(X4c, 4, 6.9 / 2, 2, "4c"); test_result(X4c, 4, 6.9f / 2, 2, "4c");
}
TEST_F(TestFImdlp, MaxDepth) {
// Set max_depth to 1
auto test = CPPFImdlp(3, 1, 0);
vector<cutPoints_t> expected = {
{5.45f},
{3.35f},
{2.45f},
{0.8f}
};
vector<int> depths = {1, 1, 1, 1};
test_dataset(test, "iris", expected, depths);
}
TEST_F(TestFImdlp, MinLength) {
auto test = CPPFImdlp(75, 100, 0);
// Set min_length to 75
vector<cutPoints_t> expected = {
{5.45f, 5.75f},
{2.85f, 3.35f},
{2.45f, 4.75f},
{0.8f, 1.75f}
};
vector<int> depths = {3, 2, 2, 2};
test_dataset(test, "iris", expected, depths);
}
TEST_F(TestFImdlp, MinLengthMaxDepth) {
// Set min_length to 75
auto test = CPPFImdlp(75, 2, 0);
vector<cutPoints_t> expected = {
{5.45f, 5.75f},
{2.85f, 3.35f},
{2.45f, 4.75f},
{0.8f, 1.75f}
};
vector<int> depths = {2, 2, 2, 2};
test_dataset(test, "iris", expected, depths);
}
TEST_F(TestFImdlp, MaxCutPointsInteger) {
// Set min_length to 75
auto test = CPPFImdlp(75, 2, 1);
vector<cutPoints_t> expected = {
{5.45f},
{3.35f},
{2.45f},
{0.8f}
};
vector<int> depths = {1, 1, 1, 1};
test_dataset(test, "iris", expected, depths);
}
TEST_F(TestFImdlp, MaxCutPointsFloat) {
// Set min_length to 75
auto test = CPPFImdlp(75, 2, 0.2f);
vector<cutPoints_t> expected = {
{5.45f, 5.75f},
{2.85f, 3.35f},
{2.45f, 4.75f},
{0.8f, 1.75f}
};
vector<int> depths = {2, 2, 2, 2};
test_dataset(test, "iris", expected, depths);
}
TEST_F(TestFImdlp, ProposedCuts) {
vector<pair<float, size_t>> proposed_list = {{0.1f, 2},
{0.5f, 10},
{0.07f, 1},
{1.0f, 1},
{2.0f, 2}};
size_t expected;
size_t computed;
for (auto proposed_item: proposed_list) {
tie(proposed_cuts, expected) = proposed_item;
computed = compute_max_num_cut_points();
ASSERT_EQ(expected, computed);
}
} }
} }

View File

@@ -1,43 +1,40 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "../Metrics.h" #include "../Metrics.h"
namespace mdlp { namespace mdlp {
class TestMetrics : public Metrics, public testing::Test { class TestMetrics : public Metrics, public testing::Test {
public: public:
labels_t y; labels_t y_ = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
samples_t X; indices_t indices_ = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
indices_t indices; precision_t precision = 0.000001f;
precision_t precision = 0.000001;
TestMetrics(): Metrics(y, indices) {} TestMetrics() : Metrics(y_, indices_) {};
void SetUp()
{ void SetUp() override {
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; setData(y_, indices_);
indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
setData(y, indices);
} }
}; };
TEST_F(TestMetrics, NumClasses)
{ TEST_F(TestMetrics, NumClasses) {
y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
EXPECT_EQ(1, computeNumClasses(4, 8)); EXPECT_EQ(1, computeNumClasses(4, 8));
EXPECT_EQ(2, computeNumClasses(0, 10)); EXPECT_EQ(2, computeNumClasses(0, 10));
EXPECT_EQ(2, computeNumClasses(8, 10)); EXPECT_EQ(2, computeNumClasses(8, 10));
} }
TEST_F(TestMetrics, Entropy)
{ TEST_F(TestMetrics, Entropy) {
EXPECT_EQ(1, entropy(0, 10)); EXPECT_EQ(1, entropy(0, 10));
EXPECT_EQ(0, entropy(0, 5)); EXPECT_EQ(0, entropy(0, 5));
y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
setData(y, indices); setData(y, indices);
ASSERT_NEAR(0.468996, entropy(0, 10), precision); ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
} }
TEST_F(TestMetrics, InformationGain)
{ TEST_F(TestMetrics, InformationGain) {
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
setData(y, indices); setData(y, indices);
ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision); ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
} }
} }

View File

@@ -1,4 +0,0 @@
rm -fr lcoverage/*
lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
genhtml lcoverage/main_coverage.info --output-directory lcoverage
open lcoverage/index.html

View File

@@ -0,0 +1,399 @@
% 1. Title: BUPA liver disorders
%
% 2. Source information:
% -- Creators: BUPA Medical Research Ltd.
% -- Donor: Richard S. Forsyth
% 8 Grosvenor Avenue
% Mapperley Park
% Nottingham NG3 5DX
% 0602-621676
% -- Date: 5/15/1990
%
% 3. Past usage:
% -- None known other than what is shown in the PC/BEAGLE User's Guide
% (written by Richard S. Forsyth).
%
% 4. Relevant information:
% -- The first 5 variables are all blood tests which are thought
% to be sensitive to liver disorders that might arise from
% excessive alcohol consumption. Each line in the bupa.data file
% constitutes the record of a single male individual.
% -- It appears that drinks>5 is some sort of a selector on this database.
% See the PC/BEAGLE User's Guide for more information.
%
% 5. Number of instances: 345
%
% 6. Number of attributes: 7 overall
%
% 7. Attribute information:
% 1. mcv mean corpuscular volume
% 2. alkphos alkaline phosphotase
% 3. sgpt alamine aminotransferase
% 4. sgot aspartate aminotransferase
% 5. gammagt gamma-glutamyl transpeptidase
% 6. drinks number of half-pint equivalents of alcoholic beverages
% drunk per day
% 7. selector field used to split data into two sets
%
% 8. Missing values: none%
% Information about the dataset
% CLASSTYPE: nominal
% CLASSINDEX: last
%
@relation liver-disorders
@attribute mcv INTEGER
@attribute alkphos INTEGER
@attribute sgpt INTEGER
@attribute sgot INTEGER
@attribute gammagt INTEGER
@attribute drinks REAL
@attribute selector {1,2}
@data
85,92,45,27,31,0.0,1
85,64,59,32,23,0.0,2
86,54,33,16,54,0.0,2
91,78,34,24,36,0.0,2
87,70,12,28,10,0.0,2
98,55,13,17,17,0.0,2
88,62,20,17,9,0.5,1
88,67,21,11,11,0.5,1
92,54,22,20,7,0.5,1
90,60,25,19,5,0.5,1
89,52,13,24,15,0.5,1
82,62,17,17,15,0.5,1
90,64,61,32,13,0.5,1
86,77,25,19,18,0.5,1
96,67,29,20,11,0.5,1
91,78,20,31,18,0.5,1
89,67,23,16,10,0.5,1
89,79,17,17,16,0.5,1
91,107,20,20,56,0.5,1
94,116,11,33,11,0.5,1
92,59,35,13,19,0.5,1
93,23,35,20,20,0.5,1
90,60,23,27,5,0.5,1
96,68,18,19,19,0.5,1
84,80,47,33,97,0.5,1
92,70,24,13,26,0.5,1
90,47,28,15,18,0.5,1
88,66,20,21,10,0.5,1
91,102,17,13,19,0.5,1
87,41,31,19,16,0.5,1
86,79,28,16,17,0.5,1
91,57,31,23,42,0.5,1
93,77,32,18,29,0.5,1
88,96,28,21,40,0.5,1
94,65,22,18,11,0.5,1
91,72,155,68,82,0.5,2
85,54,47,33,22,0.5,2
79,39,14,19,9,0.5,2
85,85,25,26,30,0.5,2
89,63,24,20,38,0.5,2
84,92,68,37,44,0.5,2
89,68,26,39,42,0.5,2
89,101,18,25,13,0.5,2
86,84,18,14,16,0.5,2
85,65,25,14,18,0.5,2
88,61,19,21,13,0.5,2
92,56,14,16,10,0.5,2
95,50,29,25,50,0.5,2
91,75,24,22,11,0.5,2
83,40,29,25,38,0.5,2
89,74,19,23,16,0.5,2
85,64,24,22,11,0.5,2
92,57,64,36,90,0.5,2
94,48,11,23,43,0.5,2
87,52,21,19,30,0.5,2
85,65,23,29,15,0.5,2
84,82,21,21,19,0.5,2
88,49,20,22,19,0.5,2
96,67,26,26,36,0.5,2
90,63,24,24,24,0.5,2
90,45,33,34,27,0.5,2
90,72,14,15,18,0.5,2
91,55,4,8,13,0.5,2
91,52,15,22,11,0.5,2
87,71,32,19,27,1.0,1
89,77,26,20,19,1.0,1
89,67,5,17,14,1.0,2
85,51,26,24,23,1.0,2
103,75,19,30,13,1.0,2
90,63,16,21,14,1.0,2
90,63,29,23,57,2.0,1
90,67,35,19,35,2.0,1
87,66,27,22,9,2.0,1
90,73,34,21,22,2.0,1
86,54,20,21,16,2.0,1
90,80,19,14,42,2.0,1
87,90,43,28,156,2.0,2
96,72,28,19,30,2.0,2
91,55,9,25,16,2.0,2
95,78,27,25,30,2.0,2
92,101,34,30,64,2.0,2
89,51,41,22,48,2.0,2
91,99,42,33,16,2.0,2
94,58,21,18,26,2.0,2
92,60,30,27,297,2.0,2
94,58,21,18,26,2.0,2
88,47,33,26,29,2.0,2
92,65,17,25,9,2.0,2
92,79,22,20,11,3.0,1
84,83,20,25,7,3.0,1
88,68,27,21,26,3.0,1
86,48,20,20,6,3.0,1
99,69,45,32,30,3.0,1
88,66,23,12,15,3.0,1
89,62,42,30,20,3.0,1
90,51,23,17,27,3.0,1
81,61,32,37,53,3.0,2
89,89,23,18,104,3.0,2
89,65,26,18,36,3.0,2
92,75,26,26,24,3.0,2
85,59,25,20,25,3.0,2
92,61,18,13,81,3.0,2
89,63,22,27,10,4.0,1
90,84,18,23,13,4.0,1
88,95,25,19,14,4.0,1
89,35,27,29,17,4.0,1
91,80,37,23,27,4.0,1
91,109,33,15,18,4.0,1
91,65,17,5,7,4.0,1
88,107,29,20,50,4.0,2
87,76,22,55,9,4.0,2
87,86,28,23,21,4.0,2
87,42,26,23,17,4.0,2
88,80,24,25,17,4.0,2
90,96,34,49,169,4.0,2
86,67,11,15,8,4.0,2
92,40,19,20,21,4.0,2
85,60,17,21,14,4.0,2
89,90,15,17,25,4.0,2
91,57,15,16,16,4.0,2
96,55,48,39,42,4.0,2
79,101,17,27,23,4.0,2
90,134,14,20,14,4.0,2
89,76,14,21,24,4.0,2
88,93,29,27,31,4.0,2
90,67,10,16,16,4.0,2
92,73,24,21,48,4.0,2
91,55,28,28,82,4.0,2
83,45,19,21,13,4.0,2
90,74,19,14,22,4.0,2
92,66,21,16,33,5.0,1
93,63,26,18,18,5.0,1
86,78,47,39,107,5.0,2
97,44,113,45,150,5.0,2
87,59,15,19,12,5.0,2
86,44,21,11,15,5.0,2
87,64,16,20,24,5.0,2
92,57,21,23,22,5.0,2
90,70,25,23,112,5.0,2
99,59,17,19,11,5.0,2
92,80,10,26,20,6.0,1
95,60,26,22,28,6.0,1
91,63,25,26,15,6.0,1
92,62,37,21,36,6.0,1
95,50,13,14,15,6.0,1
90,76,37,19,50,6.0,1
96,70,70,26,36,6.0,1
95,62,64,42,76,6.0,1
92,62,20,23,20,6.0,1
91,63,25,26,15,6.0,1
82,56,67,38,92,6.0,2
92,82,27,24,37,6.0,2
90,63,12,26,21,6.0,2
88,37,9,15,16,6.0,2
100,60,29,23,76,6.0,2
98,43,35,23,69,6.0,2
91,74,87,50,67,6.0,2
92,87,57,25,44,6.0,2
93,99,36,34,48,6.0,2
90,72,17,19,19,6.0,2
97,93,21,20,68,6.0,2
93,50,18,25,17,6.0,2
90,57,20,26,33,6.0,2
92,76,31,28,41,6.0,2
88,55,19,17,14,6.0,2
89,63,24,29,29,6.0,2
92,79,70,32,84,7.0,1
92,93,58,35,120,7.0,1
93,84,58,47,62,7.0,2
97,71,29,22,52,8.0,1
84,99,33,19,26,8.0,1
96,44,42,23,73,8.0,1
90,62,22,21,21,8.0,1
92,94,18,17,6,8.0,1
90,67,77,39,114,8.0,1
97,71,29,22,52,8.0,1
91,69,25,25,66,8.0,2
93,59,17,20,14,8.0,2
92,95,85,48,200,8.0,2
90,50,26,22,53,8.0,2
91,62,59,47,60,8.0,2
92,93,22,28,123,9.0,1
92,77,86,41,31,10.0,1
86,66,22,24,26,10.0,2
98,57,31,34,73,10.0,2
95,80,50,64,55,10.0,2
92,108,53,33,94,12.0,2
97,92,22,28,49,12.0,2
93,77,39,37,108,16.0,1
94,83,81,34,201,20.0,1
87,75,25,21,14,0.0,1
88,56,23,18,12,0.0,1
84,97,41,20,32,0.0,2
94,91,27,20,15,0.5,1
97,62,17,13,5,0.5,1
92,85,25,20,12,0.5,1
82,48,27,15,12,0.5,1
88,74,31,25,15,0.5,1
95,77,30,14,21,0.5,1
88,94,26,18,8,0.5,1
91,70,19,19,22,0.5,1
83,54,27,15,12,0.5,1
91,105,40,26,56,0.5,1
86,79,37,28,14,0.5,1
91,96,35,22,135,0.5,1
89,82,23,14,35,0.5,1
90,73,24,23,11,0.5,1
90,87,19,25,19,0.5,1
89,82,33,32,18,0.5,1
85,79,17,8,9,0.5,1
85,119,30,26,17,0.5,1
78,69,24,18,31,0.5,1
88,107,34,21,27,0.5,1
89,115,17,27,7,0.5,1
92,67,23,15,12,0.5,1
89,101,27,34,14,0.5,1
91,84,11,12,10,0.5,1
94,101,41,20,53,0.5,2
88,46,29,22,18,0.5,2
88,122,35,29,42,0.5,2
84,88,28,25,35,0.5,2
90,79,18,15,24,0.5,2
87,69,22,26,11,0.5,2
65,63,19,20,14,0.5,2
90,64,12,17,14,0.5,2
85,58,18,24,16,0.5,2
88,81,41,27,36,0.5,2
86,78,52,29,62,0.5,2
82,74,38,28,48,0.5,2
86,58,36,27,59,0.5,2
94,56,30,18,27,0.5,2
87,57,30,30,22,0.5,2
98,74,148,75,159,0.5,2
94,75,20,25,38,0.5,2
83,68,17,20,71,0.5,2
93,56,25,21,33,0.5,2
101,65,18,21,22,0.5,2
92,65,25,20,31,0.5,2
92,58,14,16,13,0.5,2
86,58,16,23,23,0.5,2
85,62,15,13,22,0.5,2
86,57,13,20,13,0.5,2
86,54,26,30,13,0.5,2
81,41,33,27,34,1.0,1
91,67,32,26,13,1.0,1
91,80,21,19,14,1.0,1
92,60,23,15,19,1.0,1
91,60,32,14,8,1.0,1
93,65,28,22,10,1.0,1
90,63,45,24,85,1.0,2
87,92,21,22,37,1.0,2
83,78,31,19,115,1.0,2
95,62,24,23,14,1.0,2
93,59,41,30,48,1.0,2
84,82,43,32,38,2.0,1
87,71,33,20,22,2.0,1
86,44,24,15,18,2.0,1
86,66,28,24,21,2.0,1
88,58,31,17,17,2.0,1
90,61,28,29,31,2.0,1
88,69,70,24,64,2.0,1
93,87,18,17,26,2.0,1
98,58,33,21,28,2.0,1
91,44,18,18,23,2.0,2
87,75,37,19,70,2.0,2
94,91,30,26,25,2.0,2
88,85,14,15,10,2.0,2
89,109,26,25,27,2.0,2
87,59,37,27,34,2.0,2
93,58,20,23,18,2.0,2
88,57,9,15,16,2.0,2
94,65,38,27,17,3.0,1
91,71,12,22,11,3.0,1
90,55,20,20,16,3.0,1
91,64,21,17,26,3.0,2
88,47,35,26,33,3.0,2
82,72,31,20,84,3.0,2
85,58,83,49,51,3.0,2
91,54,25,22,35,4.0,1
98,50,27,25,53,4.0,2
86,62,29,21,26,4.0,2
89,48,32,22,14,4.0,2
82,68,20,22,9,4.0,2
83,70,17,19,23,4.0,2
96,70,21,26,21,4.0,2
94,117,77,56,52,4.0,2
93,45,11,14,21,4.0,2
93,49,27,21,29,4.0,2
84,73,46,32,39,4.0,2
91,63,17,17,46,4.0,2
90,57,31,18,37,4.0,2
87,45,19,13,16,4.0,2
91,68,14,20,19,4.0,2
86,55,29,35,108,4.0,2
91,86,52,47,52,4.0,2
88,46,15,33,55,4.0,2
85,52,22,23,34,4.0,2
89,72,33,27,55,4.0,2
95,59,23,18,19,4.0,2
94,43,154,82,121,4.0,2
96,56,38,26,23,5.0,2
90,52,10,17,12,5.0,2
94,45,20,16,12,5.0,2
99,42,14,21,49,5.0,2
93,102,47,23,37,5.0,2
94,71,25,26,31,5.0,2
92,73,33,34,115,5.0,2
87,54,41,29,23,6.0,1
92,67,15,14,14,6.0,1
98,101,31,26,32,6.0,1
92,53,51,33,92,6.0,1
97,94,43,43,82,6.0,1
93,43,11,16,54,6.0,1
93,68,24,18,19,6.0,1
95,36,38,19,15,6.0,1
99,86,58,42,203,6.0,1
98,66,103,57,114,6.0,1
92,80,10,26,20,6.0,1
96,74,27,25,43,6.0,2
95,93,21,27,47,6.0,2
86,109,16,22,28,6.0,2
91,46,30,24,39,7.0,2
102,82,34,78,203,7.0,2
85,50,12,18,14,7.0,2
91,57,33,23,12,8.0,1
91,52,76,32,24,8.0,1
93,70,46,30,33,8.0,1
87,55,36,19,25,8.0,1
98,123,28,24,31,8.0,1
82,55,18,23,44,8.0,2
95,73,20,25,225,8.0,2
97,80,17,20,53,8.0,2
100,83,25,24,28,8.0,2
88,91,56,35,126,9.0,2
91,138,45,21,48,10.0,1
92,41,37,22,37,10.0,1
86,123,20,25,23,10.0,2
91,93,35,34,37,10.0,2
87,87,15,23,11,10.0,2
87,56,52,43,55,10.0,2
99,75,26,24,41,12.0,1
96,69,53,43,203,12.0,2
98,77,55,35,89,15.0,1
91,68,27,26,14,16.0,1
98,99,57,45,65,20.0,1

View File

@@ -1,12 +1,20 @@
if [ -d build ] ; then
rm -fr build
fi
if [ -d gcovr-report ] ; then
rm -fr gcovr-report
fi
cmake -S . -B build -Wno-dev cmake -S . -B build -Wno-dev
if test $? -ne 0; then
echo "Error in creating build commands."
exit 1
fi
cmake --build build cmake --build build
if test $? -ne 0; then
echo "Error in build command."
exit 1
fi
cd build cd build
ctest --output-on-failure ctest --output-on-failure
cd ..
if [ ! -d gcovr-report ] ; then
mkdir gcovr-report
fi
rm -fr gcovr-report/* 2>/dev/null
#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
#lcov --list lcoverage/main_coverage.info
cd ..
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml