Update boost version requirement for Linux

Add friedman hyperparameter
Add boost library link to linux build
2023-09-26 14:12:53 +02:00 · 2023-09-26 11:26:59 +02:00 · 2023-09-26 01:07:50 +02:00 · 2023-09-26 01:04:59 +02:00 · 2023-09-25 18:38:12 +02:00 · 2023-09-25 12:02:17 +00:00
5 changed files with 113 additions and 14 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 option(ENABLE_CLANG_TIDY "Enable to add clang tidy."              OFF)
 option(ENABLE_TESTING "Unit testing build"                        OFF)
 option(CODE_COVERAGE "Collect coverage from test library"         OFF)
+
+# Boost Library
+set(Boost_USE_STATIC_LIBS OFF) 
+set(Boost_USE_MULTITHREADED ON)  
+set(Boost_USE_STATIC_RUNTIME OFF) 
+find_package(Boost 1.78.0 REQUIRED) 
+if(Boost_FOUND)
+    message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
+    include_directories(${Boost_INCLUDE_DIRS}) 
+endif()
+
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
 # CMakes modules
 # --------------
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -6,7 +6,7 @@
 #include "BestResults.h"
 #include "Result.h"
 #include "Colors.h"
-
+#include <boost/math/distributions/chi_squared.hpp>


 namespace fs = std::filesystem;
@@ -200,6 +200,70 @@ namespace platform {
        table["dateTable"] = ftime_to_string(maxDate);
        return table;
    }
+    map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
+    {
+        // sort the ranksOrder vector by value
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
+            return a.second > b.second;
+            });
+        //Assign ranks to  values and if they are the same they share the same averaged rank
+        map<string, float> ranks;
+        for (int i = 0; i < ranksOrder.size(); i++) {
+            ranks[ranksOrder[i].first] = i + 1.0;
+        }
+        int i = 0;
+        while (i < static_cast<int>(ranksOrder.size())) {
+            int j = i + 1;
+            int sumRanks = ranks[ranksOrder[i].first];
+            while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
+                sumRanks += ranks[ranksOrder[j++].first];
+            }
+            if (j > i + 1) {
+                float averageRank = (float)sumRanks / (j - i);
+                for (int k = i; k < j; k++) {
+                    ranks[ranksOrder[k].first] = averageRank;
+                }
+            }
+            i = j;
+        }
+        return ranks;
+    }
+    void friedmanTest(int nModels, int nDatasets, map<string, float> ranks, double significance = 0.05)
+    {
+        // Friedman test
+        // Calculate the Friedman statistic
+        double sum = 0.0;
+        if (nModels < 3 || nDatasets < 3) {
+            cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl;
+            return;
+        }
+        cout << Colors::BLUE() << endl;
+        cout << "*************************************************************************************" << endl;
+        cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
+        for (const auto& rank : ranks) {
+            sum += rank.second;
+        }
+        double degreesOfFreedom = nModels - 1.0;
+        double sumSquared = 0;
+        for (const auto& rank : ranks) {
+            sumSquared += rank.second * rank.second;
+        }
+        double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
+        cout << "Friedman statistic: " << friedmanQ << endl;
+        // Calculate the critical value
+        boost::math::chi_squared chiSquared(degreesOfFreedom);
+        long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
+        double criticalValue = quantile(chiSquared, 1 - significance);
+        std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
+            << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
+        cout << "p-value: " << scientific << p_value << endl;
+        if (friedmanQ > criticalValue) {
+            cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
+        } else {
+            cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
+        }
+        cout << Colors::BLUE() << "*************************************************************************************" << endl;
+    }
    void BestResults::printTableResults(set<string> models, json table)
    {
        cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
@@ -217,7 +281,8 @@ namespace platform {
        auto i = 0;
        bool odd = true;
        map<string, double> totals;
-        map<string, int> ranks;
+        map<string, float> ranks;
+        map<string, float> ranksTotal;
        for (const auto& model : models) {
            totals[model] = 0.0;
        }
@@ -236,13 +301,14 @@ namespace platform {
                }
                ranksOrder.push_back({ model, value });
            }
-            // sort the ranksOrder vector by value
-            sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
-                return a.second > b.second;
-                });
            // Assign the ranks
-            for (int i = 0; i < ranksOrder.size(); i++) {
-                ranks[ranksOrder[i].first] = i + 1;
+            ranks = assignRanks(ranksOrder);
+            if (ranksTotal.size() == 0) {
+                ranksTotal = ranks;
+            } else {
+                for (const auto& rank : ranks) {
+                    ranksTotal[rank.first] += rank.second;
+                }
            }
            // Print the row with red colors on max values
            for (const auto& model : models) {
@@ -279,20 +345,32 @@ namespace platform {
        // Output the averaged ranks
        cout << endl;
        int min = 1;
-        for (const auto& rank : ranks) {
+        for (const auto& rank : ranksTotal) {
            if (rank.second < min) {
                min = rank.second;
            }
        }
+        cout << Colors::BLUE() << setw(30) << "    Ranks....................";
+        for (const auto& model : models) {
+            string efectiveColor = Colors::BLUE();
+            if (ranksTotal[model] == min) {
+                efectiveColor = Colors::RED();
+            }
+            cout << efectiveColor << setw(12) << setprecision(4) << fixed << (double)ranksTotal[model] << " ";
+        }
+        cout << endl;
        cout << Colors::GREEN() << setw(30) << "    Averaged ranks...........";
        for (const auto& model : models) {
            string efectiveColor = Colors::GREEN();
-            if (ranks[model] == min) {
+            if (ranksTotal[model] == min) {
                efectiveColor = Colors::RED();
            }
-            cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " ";
+            cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " ";
        }
        cout << endl;
+        if (friedman) {
+            friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05);
+        }
    }
    void BestResults::reportAll()
    {
--- a/src/Platform/BestResults.h
+++ b/src/Platform/BestResults.h
@@ -8,7 +8,7 @@ using json = nlohmann::json;
 namespace platform {
    class BestResults {
    public:
-        explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
+        explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
        string build();
        void reportSingle();
        void reportAll();
@@ -23,6 +23,7 @@ namespace platform {
        string path;
        string score;
        string model;
+        bool friedman;
    };
 }
 #endif //BESTRESULTS_H
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -12,8 +12,9 @@ add_executable(best best.cc BestResults.cc Result.cc)
 target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
    target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
-    target_link_libraries(best stdc++fs)
+    target_link_libraries(best Boost::boost stdc++fs)
 else()
    target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
+    target_link_libraries(best Boost::boost)
 endif()
 target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
--- a/src/Platform/best.cc
+++ b/src/Platform/best.cc
@@ -13,12 +13,14 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
    program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
    program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
    program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
+    program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
    try {
        program.parse_args(argc, argv);
        auto model = program.get<string>("model");
        auto score = program.get<string>("score");
        auto build = program.get<bool>("build");
        auto report = program.get<bool>("report");
+        auto friedman = program.get<bool>("friedman");
        if (model == "" || score == "") {
            throw runtime_error("Model and score name must be supplied");
        }
@@ -38,12 +40,18 @@ int main(int argc, char** argv)
    auto score = program.get<string>("score");
    auto build = program.get<bool>("build");
    auto report = program.get<bool>("report");
+    auto friedman = program.get<bool>("friedman");
+    if (friedman && model != "any") {
+        cerr << "Friedman test can only be used with all models" << endl;
+        cerr << program;
+        exit(1);
+    }
    if (!report && !build) {
        cerr << "Either build, report or both, have to be selected to do anything!" << endl;
        cerr << program;
        exit(1);
    }
-    auto results = platform::BestResults(platform::Paths::results(), score, model);
+    auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
    if (build) {
        if (model == "any") {
            results.buildAll();
Author	SHA1	Message	Date
Ricardo Montañana	ce66483b65	Update boost version requirement for Linux	2023-09-26 14:12:53 +02:00
Ricardo Montañana	cab8e14b2d	Add friedman hyperparameter	2023-09-26 11:26:59 +02:00
Ricardo Montañana	f0d0abe891	Add boost library link to linux build	2023-09-26 01:07:50 +02:00
Ricardo Montañana	dcba146e12	Begin adding Friedman test to BestResults	2023-09-26 01:04:59 +02:00
Ricardo Montañana	3ea0285119	Fix ranks to match friedman test ranks	2023-09-25 18:38:12 +02:00
Ricardo Montañana Gómez	e3888e1503	Merge pull request 'bestResults' (#9 ) from bestResults into main Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9 Add best results management, build, report, build all & report all	2023-09-25 12:02:17 +00:00