Update models versions

2024-06-13 12:30:31 +02:00
parent e628d80f4c
commit 3d900f8c81
10 changed files with 60 additions and 35 deletions
--- a/lib/Files
+++ b/lib/Files
--- a/src/common/Datasets.cpp
+++ b/src/common/Datasets.cpp
@@ -5,6 +5,14 @@
 namespace platform {
    using json = nlohmann::ordered_json;
    const std::string message_dataset_not_loaded = "dataset not loaded.";
+    Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
+        discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
+    {
+        if (discretizer_algorithm == "none" && discretize) {
+            throw std::runtime_error("Can't discretize without discretization algorithm");
+        }
+        load();
+    }
    void Datasets::load()
    {
        auto sd = SourceData(sfileType);
--- a/src/common/Datasets.h
+++ b/src/common/Datasets.h
@@ -4,14 +4,7 @@
 namespace platform {
    class Datasets {
    public:
-        explicit Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm = "none") :
-            discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
-        {
-            if (discretizer_algorithm == "none" && discretize) {
-                throw std::runtime_error("Can't discretize without discretization algorithm");
-            }
-            load();
-        };
+        explicit Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm = "none");
        std::vector<std::string> getNames();
        bool isDataset(const std::string& name) const;
        Dataset& getDataset(const std::string& name) const { return *datasets.at(name); }
--- a/src/common/DotEnv.h
+++ b/src/common/DotEnv.h
@@ -19,8 +19,8 @@ namespace platform {
        {
            valid =
            {
-                {"source_data", {"Arff", "Tanveer", "Surcov"}},
-                {"experiment", {"discretiz", "odte", "covid"}},
+                {"source_data", {"Arff", "Tanveer", "Surcov", "Test"}},
+                {"experiment", {"discretiz", "odte", "covid", "Test"}},
                {"fit_features", {"0", "1"}},
                {"discretize", {"0", "1"}},
                {"ignore_nan", {"0", "1"}},
@@ -31,7 +31,7 @@ namespace platform {
                {"n_folds", {"5", "10"}},
                {"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q"}},
                {"smooth_strat", {"OLD_LAPLACE", "LAPLACE", "CESTNIK"}},
-                { "platform", {"any"} },
+                {"platform", {"any"}},
                {"model", {"any"}},
                {"seeds", {"any"}},
                {"nodes", {"any"}},
@@ -41,8 +41,25 @@ namespace platform {
            if (create) {
                // For testing purposes
                std::ofstream file(".env");
-                file << "source_data = Test" << std::endl;
-                file << "margin = 0.1" << std::endl;
+                file << "experiment=Test" << std::endl;
+                file << "source_data=Test" << std::endl;
+                file << "margin=0.1" << std::endl;
+                file << "score=accuracy" << std::endl;
+                file << "platform=um790Linux" << std::endl;
+                file << "n_folds=5" << std::endl;
+                file << "discretize_algo=mdlp" << std::endl;
+                file << "smooth_strat=OLD_LAPLACE" << std::endl;
+                file << "stratified=0" << std::endl;
+                file << "model=TAN" << std::endl;
+                file << "seeds=[271]" << std::endl;
+                file << "discretize=0" << std::endl;
+                file << "ignore_nan=0" << std::endl;
+                file << "nodes=Nodes" << std::endl;
+                file << "leaves=Edges" << std::endl;
+                file << "depth=States" << std::endl;
+                file << "fit_features=0" << std::endl;
+                file << "framework=bulma" << std::endl;
+                file << "margin=0.1" << std::endl;
                file.close();
            }
            std::ifstream file(".env");
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -13,10 +13,10 @@ if(ENABLE_TESTING)
    )
    set(TEST_SOURCES_PLATFORM 
        TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp 
-        ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp
+        ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp ${Platform_SOURCE_DIR}/src/common/Discretization.cpp
        ${Platform_SOURCE_DIR}/src/main/Scores.cpp
    )
    add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM})
-    target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain BayesNet)
+    target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" mdlp Catch2::Catch2WithMain BayesNet)
    add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM})
 endif(ENABLE_TESTING)
--- a/tests/TestPlatform.cpp
+++ b/tests/TestPlatform.cpp
@@ -7,6 +7,7 @@
 #include <string>
 #include "TestUtils.h"
 #include "folding.hpp"
+#include <ArffFiles.hpp>
 #include <bayesnet/classifiers/TAN.h>
 #include "config.h"

@@ -14,7 +15,7 @@
 TEST_CASE("Test Platform version", "[Platform]")
 {
    std::string version = { platform_project_version.begin(), platform_project_version.end() };
-    REQUIRE(version == "1.0.4");
+    REQUIRE(version == "1.1.0");
 }
 TEST_CASE("Test Folding library version", "[Folding]")
 {
@@ -24,10 +25,15 @@ TEST_CASE("Test Folding library version", "[Folding]")
 TEST_CASE("Test BayesNet version", "[BayesNet]")
 {
    std::string version = bayesnet::TAN().getVersion();
-    REQUIRE(version == "1.0.5.1");
+    REQUIRE(version == "1.0.6");
 }
 TEST_CASE("Test mdlp version", "[mdlp]")
 {
    std::string version = mdlp::CPPFImdlp::version();
-    REQUIRE(version == "1.1.2");
+    REQUIRE(version == "1.2.1");
+}
+TEST_CASE("Test Arff version", "[Arff]")
+{
+    std::string version = ArffFiles().version();
+    REQUIRE(version == "1.1.0");
 }
--- a/tests/TestResult.cpp
+++ b/tests/TestResult.cpp
@@ -13,13 +13,14 @@
 TEST_CASE("ZeroR comparison in reports", "[Report]")
 {
    auto dotEnv = platform::DotEnv(true);
-    auto margin = 1e-2;
-    std::string dataset = "liver-disorders";
+    auto margin = 1e-4;
+    std::string dataset_name = "liver-disorders";
    auto dt = platform::Datasets(false, platform::Paths::datasets());
-    dt.loadDataset(dataset);
-    std::vector<int> distribution = dt.getClassesCounts(dataset);
-    double nSamples = dt.getNSamples(dataset);
+    auto& dataset = dt.getDataset(dataset_name);
+    dataset.load();
+    std::vector<int> distribution = dataset.getClassesCounts();
+    double nSamples = dataset.getNSamples();
    std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
    double mark = *maxValue / nSamples * (1 + margin);
-    REQUIRE(mark == Catch::Approx(0.585507f).epsilon(1e-5));
+    REQUIRE(mark == Catch::Approx(0.57976811f).epsilon(margin));
 }
--- a/tests/TestScores.cpp
+++ b/tests/TestScores.cpp
@@ -128,7 +128,7 @@ TEST_CASE("Confusion Matrix JSON", "[Scores]")
    REQUIRE(res_json_str["Car"][1] == 2);
    REQUIRE(res_json_str["Car"][2] == 3);
 }
-TEST_CASE("Classification Report", "[Scores]") -
+TEST_CASE("Classification Report", "[Scores]")
 {
    std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
    std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
--- a/tests/TestUtils.h
+++ b/tests/TestUtils.h
@@ -5,7 +5,7 @@
 #include <vector>
 #include <map>
 #include <tuple>
-#include "ArffFiles.h"
+#include <ArffFiles.hpp>
 #include "CPPFImdlp.h"

 bool file_exists(const std::string& name);
--- a/tests/data/all.txt
+++ b/tests/data/all.txt
@@ -1,8 +1,8 @@
-diabetes,class, all
-ecoli,class, all
-glass,Type, all
-iris,class, all
-kdd_JapaneseVowels,speaker, [2,3,4,5,6,7,8,9,10,11,12,13]
-letter,class, all
-liver-disorders,selector, all
-mfeat-factors,class, all
+diabetes;class;all
+ecoli;class;all
+glass;Type,all
+iris;class;all
+kdd_JapaneseVowels;speaker;[2,3,4,5,6,7,8,9,10,11,12,13]
+letter;class;all
+liver-disorders;selector;all
+mfeat-factors;class;all