mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-16 07:55:58 +00:00
refactor system types in library
Add new test taken from join_fit in FImdlp python Update instructions in README
This commit is contained in:
@@ -128,8 +128,8 @@ namespace mdlp {
|
|||||||
// Cutpoints are always on boundaries (definition 2)
|
// Cutpoints are always on boundaries (definition 2)
|
||||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||||
continue;
|
continue;
|
||||||
entropy_left = precision_t(idx - start) / static_cast<float>(elements) * metrics.entropy(start, idx);
|
entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
|
||||||
entropy_right = precision_t(end - idx) / static_cast<float>(elements) * metrics.entropy(idx, end);
|
entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
|
||||||
if (entropy_left + entropy_right < minEntropy) {
|
if (entropy_left + entropy_right < minEntropy) {
|
||||||
minEntropy = entropy_left + entropy_right;
|
minEntropy = entropy_left + entropy_right;
|
||||||
candidate = idx;
|
candidate = idx;
|
||||||
@@ -155,8 +155,8 @@ namespace mdlp {
|
|||||||
ent1 = metrics.entropy(start, cut);
|
ent1 = metrics.entropy(start, cut);
|
||||||
ent2 = metrics.entropy(cut, end);
|
ent2 = metrics.entropy(cut, end);
|
||||||
ig = metrics.informationGain(start, cut, end);
|
ig = metrics.informationGain(start, cut, end);
|
||||||
delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
|
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
|
||||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||||
return ig > term;
|
return ig > term;
|
||||||
}
|
}
|
||||||
|
@@ -24,9 +24,8 @@ To run the sample, just execute the following commands:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd sample
|
cd sample
|
||||||
mkdir build
|
cmake -B build
|
||||||
cd build
|
cd build
|
||||||
cmake ..
|
|
||||||
make
|
make
|
||||||
./sample -f iris -m 2
|
./sample -f iris -m 2
|
||||||
./sample -h
|
./sample -h
|
||||||
@@ -34,7 +33,7 @@ make
|
|||||||
|
|
||||||
## Test
|
## Test
|
||||||
|
|
||||||
To run the tests, execute the following commands:
|
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd tests
|
cd tests
|
||||||
|
@@ -27,7 +27,7 @@ string ArffFiles::getClassType() const {
|
|||||||
return classType;
|
return classType;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<vector<float>> &ArffFiles::getX() {
|
vector<mdlp::samples_t> &ArffFiles::getX() {
|
||||||
return X;
|
return X;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,7 +80,7 @@ void ArffFiles::load(const string &fileName, bool classLast) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ArffFiles::generateDataset(bool classLast) {
|
void ArffFiles::generateDataset(bool classLast) {
|
||||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
X = vector<mdlp::samples_t>(attributes.size(), mdlp::samples_t(lines.size()));
|
||||||
auto yy = vector<string>(lines.size(), "");
|
auto yy = vector<string>(lines.size(), "");
|
||||||
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
|
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
|
||||||
for (size_t i = 0; i < lines.size(); i++) {
|
for (size_t i = 0; i < lines.size(); i++) {
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "../typesFImdlp.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@@ -12,7 +13,7 @@ private:
|
|||||||
vector<pair<string, string>> attributes;
|
vector<pair<string, string>> attributes;
|
||||||
string className;
|
string className;
|
||||||
string classType;
|
string classType;
|
||||||
vector<vector<float>> X;
|
vector<mdlp::samples_t> X;
|
||||||
vector<int> y;
|
vector<int> y;
|
||||||
|
|
||||||
void generateDataset(bool);
|
void generateDataset(bool);
|
||||||
@@ -32,7 +33,7 @@ public:
|
|||||||
|
|
||||||
static string trim(const string &);
|
static string trim(const string &);
|
||||||
|
|
||||||
vector<vector<float>> &getX();
|
vector<mdlp::samples_t> &getX();
|
||||||
|
|
||||||
vector<int> &getY();
|
vector<int> &getY();
|
||||||
|
|
||||||
|
@@ -111,6 +111,16 @@ namespace mdlp {
|
|||||||
EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
|
EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TestFImdlp, JoinFit) {
|
||||||
|
samples_t X_ = {1, 2, 2, 3, 4, 2, 3};
|
||||||
|
labels_t y_ = {0, 0, 1, 2, 3, 4, 5};
|
||||||
|
cutPoints_t expected = {1.5f, 2.5f};
|
||||||
|
fit(X_, y_);
|
||||||
|
auto computed = getCutPoints();
|
||||||
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
|
checkCutPoints(computed, expected);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(TestFImdlp, FitErrorMaxCutPoints) {
|
TEST_F(TestFImdlp, FitErrorMaxCutPoints) {
|
||||||
auto testmin = CPPFImdlp(2, 10, -1);
|
auto testmin = CPPFImdlp(2, 10, -1);
|
||||||
auto testmax = CPPFImdlp(3, 0, 200);
|
auto testmax = CPPFImdlp(3, 0, 200);
|
||||||
|
@@ -30,6 +30,14 @@ namespace mdlp {
|
|||||||
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TestMetrics, EntropyDouble) {
|
||||||
|
y = {0, 0, 1, 2, 3};
|
||||||
|
samples_t expected_entropies = {0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898};
|
||||||
|
for (auto idx = 0; idx < y.size(); ++idx) {
|
||||||
|
ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(TestMetrics, InformationGain) {
|
TEST_F(TestMetrics, InformationGain) {
|
||||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
||||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
|
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
#ifndef TYPES_H
|
#ifndef TYPES_H
|
||||||
#define TYPES_H
|
#define TYPES_H
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
Reference in New Issue
Block a user