From 3d900f8c815a9af1c81de6c7ae3ecfcb96c471a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@gmail.com>
Date: Thu, 13 Jun 2024 12:30:31 +0200
Subject: [PATCH] Update models versions

---
 lib/Files               |  2 +-
 src/common/Datasets.cpp |  8 ++++++++
 src/common/Datasets.h   |  9 +--------
 src/common/DotEnv.h     | 27 ++++++++++++++++++++++-----
 tests/CMakeLists.txt    |  4 ++--
 tests/TestPlatform.cpp  | 12 +++++++++---
 tests/TestResult.cpp    | 13 +++++++------
 tests/TestScores.cpp    |  2 +-
 tests/TestUtils.h       |  2 +-
 tests/data/all.txt      | 16 ++++++++--------
 10 files changed, 60 insertions(+), 35 deletions(-)

diff --git a/lib/Files b/lib/Files
index dbefa02..a531692 160000
--- a/lib/Files
+++ b/lib/Files
@@ -1 +1 @@
-Subproject commit dbefa02d9c0ca0f029f77e744cd80cb0150725c8
+Subproject commit a5316928d408266aa425f64131ab0f592b010a8d
diff --git a/src/common/Datasets.cpp b/src/common/Datasets.cpp
index 45a73e6..5bd3850 100644
--- a/src/common/Datasets.cpp
+++ b/src/common/Datasets.cpp
@@ -5,6 +5,14 @@
 namespace platform {
     using json = nlohmann::ordered_json;
     const std::string message_dataset_not_loaded = "dataset not loaded.";
+    Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
+        discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
+    {
+        if (discretizer_algorithm == "none" && discretize) {
+            throw std::runtime_error("Can't discretize without discretization algorithm");
+        }
+        load();
+    }
     void Datasets::load()
     {
         auto sd = SourceData(sfileType);
diff --git a/src/common/Datasets.h b/src/common/Datasets.h
index 35c58e7..f0dc37f 100644
--- a/src/common/Datasets.h
+++ b/src/common/Datasets.h
@@ -4,14 +4,7 @@
 namespace platform {
     class Datasets {
     public:
-        explicit Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm = "none") :
-            discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
-        {
-            if (discretizer_algorithm == "none" && discretize) {
-                throw std::runtime_error("Can't discretize without discretization algorithm");
-            }
-            load();
-        };
+        explicit Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm = "none");
         std::vector<std::string> getNames();
         bool isDataset(const std::string& name) const;
         Dataset& getDataset(const std::string& name) const { return *datasets.at(name); }
diff --git a/src/common/DotEnv.h b/src/common/DotEnv.h
index 78b24f9..6ad2dea 100644
--- a/src/common/DotEnv.h
+++ b/src/common/DotEnv.h
@@ -19,8 +19,8 @@ namespace platform {
         {
             valid =
             {
-                {"source_data", {"Arff", "Tanveer", "Surcov"}},
-                {"experiment", {"discretiz", "odte", "covid"}},
+                {"source_data", {"Arff", "Tanveer", "Surcov", "Test"}},
+                {"experiment", {"discretiz", "odte", "covid", "Test"}},
                 {"fit_features", {"0", "1"}},
                 {"discretize", {"0", "1"}},
                 {"ignore_nan", {"0", "1"}},
@@ -31,7 +31,7 @@ namespace platform {
                 {"n_folds", {"5", "10"}},
                 {"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q"}},
                 {"smooth_strat", {"OLD_LAPLACE", "LAPLACE", "CESTNIK"}},
-                { "platform", {"any"} },
+                {"platform", {"any"}},
                 {"model", {"any"}},
                 {"seeds", {"any"}},
                 {"nodes", {"any"}},
@@ -41,8 +41,25 @@ namespace platform {
             if (create) {
                 // For testing purposes
                 std::ofstream file(".env");
-                file << "source_data = Test" << std::endl;
-                file << "margin = 0.1" << std::endl;
+                file << "experiment=Test" << std::endl;
+                file << "source_data=Test" << std::endl;
+                file << "margin=0.1" << std::endl;
+                file << "score=accuracy" << std::endl;
+                file << "platform=um790Linux" << std::endl;
+                file << "n_folds=5" << std::endl;
+                file << "discretize_algo=mdlp" << std::endl;
+                file << "smooth_strat=OLD_LAPLACE" << std::endl;
+                file << "stratified=0" << std::endl;
+                file << "model=TAN" << std::endl;
+                file << "seeds=[271]" << std::endl;
+                file << "discretize=0" << std::endl;
+                file << "ignore_nan=0" << std::endl;
+                file << "nodes=Nodes" << std::endl;
+                file << "leaves=Edges" << std::endl;
+                file << "depth=States" << std::endl;
+                file << "fit_features=0" << std::endl;
+                file << "framework=bulma" << std::endl;
+                file << "margin=0.1" << std::endl;
                 file.close();
             }
             std::ifstream file(".env");
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 7074b4a..d6d9384 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -13,10 +13,10 @@ if(ENABLE_TESTING)
     )
     set(TEST_SOURCES_PLATFORM 
         TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp 
-        ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp
+        ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp ${Platform_SOURCE_DIR}/src/common/Discretization.cpp
         ${Platform_SOURCE_DIR}/src/main/Scores.cpp
     )
     add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM})
-    target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain BayesNet)
+    target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" mdlp Catch2::Catch2WithMain BayesNet)
     add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM})
 endif(ENABLE_TESTING)
diff --git a/tests/TestPlatform.cpp b/tests/TestPlatform.cpp
index 25d810b..7be2597 100644
--- a/tests/TestPlatform.cpp
+++ b/tests/TestPlatform.cpp
@@ -7,6 +7,7 @@
 #include <string>
 #include "TestUtils.h"
 #include "folding.hpp"
+#include <ArffFiles.hpp>
 #include <bayesnet/classifiers/TAN.h>
 #include "config.h"
 
@@ -14,7 +15,7 @@
 TEST_CASE("Test Platform version", "[Platform]")
 {
     std::string version = { platform_project_version.begin(), platform_project_version.end() };
-    REQUIRE(version == "1.0.4");
+    REQUIRE(version == "1.1.0");
 }
 TEST_CASE("Test Folding library version", "[Folding]")
 {
@@ -24,10 +25,15 @@ TEST_CASE("Test Folding library version", "[Folding]")
 TEST_CASE("Test BayesNet version", "[BayesNet]")
 {
     std::string version = bayesnet::TAN().getVersion();
-    REQUIRE(version == "1.0.5.1");
+    REQUIRE(version == "1.0.6");
 }
 TEST_CASE("Test mdlp version", "[mdlp]")
 {
     std::string version = mdlp::CPPFImdlp::version();
-    REQUIRE(version == "1.1.2");
+    REQUIRE(version == "1.2.1");
+}
+TEST_CASE("Test Arff version", "[Arff]")
+{
+    std::string version = ArffFiles().version();
+    REQUIRE(version == "1.1.0");
 }
\ No newline at end of file
diff --git a/tests/TestResult.cpp b/tests/TestResult.cpp
index 1c96bd6..8c978a7 100644
--- a/tests/TestResult.cpp
+++ b/tests/TestResult.cpp
@@ -13,13 +13,14 @@
 TEST_CASE("ZeroR comparison in reports", "[Report]")
 {
     auto dotEnv = platform::DotEnv(true);
-    auto margin = 1e-2;
-    std::string dataset = "liver-disorders";
+    auto margin = 1e-4;
+    std::string dataset_name = "liver-disorders";
     auto dt = platform::Datasets(false, platform::Paths::datasets());
-    dt.loadDataset(dataset);
-    std::vector<int> distribution = dt.getClassesCounts(dataset);
-    double nSamples = dt.getNSamples(dataset);
+    auto& dataset = dt.getDataset(dataset_name);
+    dataset.load();
+    std::vector<int> distribution = dataset.getClassesCounts();
+    double nSamples = dataset.getNSamples();
     std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
     double mark = *maxValue / nSamples * (1 + margin);
-    REQUIRE(mark == Catch::Approx(0.585507f).epsilon(1e-5));
+    REQUIRE(mark == Catch::Approx(0.57976811f).epsilon(margin));
 }
\ No newline at end of file
diff --git a/tests/TestScores.cpp b/tests/TestScores.cpp
index 2cdd805..c920c9a 100644
--- a/tests/TestScores.cpp
+++ b/tests/TestScores.cpp
@@ -128,7 +128,7 @@ TEST_CASE("Confusion Matrix JSON", "[Scores]")
     REQUIRE(res_json_str["Car"][1] == 2);
     REQUIRE(res_json_str["Car"][2] == 3);
 }
-TEST_CASE("Classification Report", "[Scores]") -
+TEST_CASE("Classification Report", "[Scores]")
 {
     std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
     std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
diff --git a/tests/TestUtils.h b/tests/TestUtils.h
index d0cf4d9..f984905 100644
--- a/tests/TestUtils.h
+++ b/tests/TestUtils.h
@@ -5,7 +5,7 @@
 #include <vector>
 #include <map>
 #include <tuple>
-#include "ArffFiles.h"
+#include <ArffFiles.hpp>
 #include "CPPFImdlp.h"
 
 bool file_exists(const std::string& name);
diff --git a/tests/data/all.txt b/tests/data/all.txt
index 6963a58..aa16f10 100644
--- a/tests/data/all.txt
+++ b/tests/data/all.txt
@@ -1,8 +1,8 @@
-diabetes,class, all
-ecoli,class, all
-glass,Type, all
-iris,class, all
-kdd_JapaneseVowels,speaker, [2,3,4,5,6,7,8,9,10,11,12,13]
-letter,class, all
-liver-disorders,selector, all
-mfeat-factors,class, all
\ No newline at end of file
+diabetes;class;all
+ecoli;class;all
+glass;Type,all
+iris;class;all
+kdd_JapaneseVowels;speaker;[2,3,4,5,6,7,8,9,10,11,12,13]
+letter;class;all
+liver-disorders;selector;all
+mfeat-factors;class;all
\ No newline at end of file