mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 15:35:55 +00:00
refactor system types in library
Add new test taken from join_fit in FImdlp python Update instructions in README
This commit is contained in:
@@ -128,8 +128,8 @@ namespace mdlp {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / static_cast<float>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<float>(elements) * metrics.entropy(idx, end);
|
||||
entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
@@ -155,8 +155,8 @@ namespace mdlp {
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
@@ -24,9 +24,8 @@ To run the sample, just execute the following commands:
|
||||
|
||||
```bash
|
||||
cd sample
|
||||
mkdir build
|
||||
cmake -B build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
./sample -f iris -m 2
|
||||
./sample -h
|
||||
@@ -34,7 +33,7 @@ make
|
||||
|
||||
## Test
|
||||
|
||||
To run the tests, execute the following commands:
|
||||
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
||||
|
||||
```bash
|
||||
cd tests
|
||||
|
@@ -27,7 +27,7 @@ string ArffFiles::getClassType() const {
|
||||
return classType;
|
||||
}
|
||||
|
||||
vector<vector<float>> &ArffFiles::getX() {
|
||||
vector<mdlp::samples_t> &ArffFiles::getX() {
|
||||
return X;
|
||||
}
|
||||
|
||||
@@ -80,7 +80,7 @@ void ArffFiles::load(const string &fileName, bool classLast) {
|
||||
}
|
||||
|
||||
void ArffFiles::generateDataset(bool classLast) {
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
X = vector<mdlp::samples_t>(attributes.size(), mdlp::samples_t(lines.size()));
|
||||
auto yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
|
@@ -3,6 +3,7 @@
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "../typesFImdlp.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -12,7 +13,7 @@ private:
|
||||
vector<pair<string, string>> attributes;
|
||||
string className;
|
||||
string classType;
|
||||
vector<vector<float>> X;
|
||||
vector<mdlp::samples_t> X;
|
||||
vector<int> y;
|
||||
|
||||
void generateDataset(bool);
|
||||
@@ -32,7 +33,7 @@ public:
|
||||
|
||||
static string trim(const string &);
|
||||
|
||||
vector<vector<float>> &getX();
|
||||
vector<mdlp::samples_t> &getX();
|
||||
|
||||
vector<int> &getY();
|
||||
|
||||
|
@@ -111,6 +111,16 @@ namespace mdlp {
|
||||
EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, JoinFit) {
|
||||
samples_t X_ = {1, 2, 2, 3, 4, 2, 3};
|
||||
labels_t y_ = {0, 0, 1, 2, 3, 4, 5};
|
||||
cutPoints_t expected = {1.5f, 2.5f};
|
||||
fit(X_, y_);
|
||||
auto computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
checkCutPoints(computed, expected);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorMaxCutPoints) {
|
||||
auto testmin = CPPFImdlp(2, 10, -1);
|
||||
auto testmax = CPPFImdlp(3, 0, 200);
|
||||
|
@@ -30,6 +30,14 @@ namespace mdlp {
|
||||
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, EntropyDouble) {
|
||||
y = {0, 0, 1, 2, 3};
|
||||
samples_t expected_entropies = {0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898};
|
||||
for (auto idx = 0; idx < y.size(); ++idx) {
|
||||
ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, InformationGain) {
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
Reference in New Issue
Block a user