Compare commits

...

6 Commits

5 changed files with 113 additions and 14 deletions

View File

@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.78.0 REQUIRED)
if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS})
endif()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# CMakes modules
# --------------

View File

@@ -6,7 +6,7 @@
#include "BestResults.h"
#include "Result.h"
#include "Colors.h"
#include <boost/math/distributions/chi_squared.hpp>
namespace fs = std::filesystem;
@@ -200,6 +200,70 @@ namespace platform {
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
{
// sort the ranksOrder vector by value
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
return a.second > b.second;
});
//Assign ranks to values and if they are the same they share the same averaged rank
map<string, float> ranks;
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1.0;
}
int i = 0;
while (i < static_cast<int>(ranksOrder.size())) {
int j = i + 1;
int sumRanks = ranks[ranksOrder[i].first];
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
sumRanks += ranks[ranksOrder[j++].first];
}
if (j > i + 1) {
float averageRank = (float)sumRanks / (j - i);
for (int k = i; k < j; k++) {
ranks[ranksOrder[k].first] = averageRank;
}
}
i = j;
}
return ranks;
}
void friedmanTest(int nModels, int nDatasets, map<string, float> ranks, double significance = 0.05)
{
// Friedman test
// Calculate the Friedman statistic
double sum = 0.0;
if (nModels < 3 || nDatasets < 3) {
cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl;
return;
}
cout << Colors::BLUE() << endl;
cout << "*************************************************************************************" << endl;
cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
for (const auto& rank : ranks) {
sum += rank.second;
}
double degreesOfFreedom = nModels - 1.0;
double sumSquared = 0;
for (const auto& rank : ranks) {
sumSquared += rank.second * rank.second;
}
double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
cout << "Friedman statistic: " << friedmanQ << endl;
// Calculate the critical value
boost::math::chi_squared chiSquared(degreesOfFreedom);
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
double criticalValue = quantile(chiSquared, 1 - significance);
std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
<< " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
cout << "p-value: " << scientific << p_value << endl;
if (friedmanQ > criticalValue) {
cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
} else {
cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
}
cout << Colors::BLUE() << "*************************************************************************************" << endl;
}
void BestResults::printTableResults(set<string> models, json table)
{
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
@@ -217,7 +281,8 @@ namespace platform {
auto i = 0;
bool odd = true;
map<string, double> totals;
map<string, int> ranks;
map<string, float> ranks;
map<string, float> ranksTotal;
for (const auto& model : models) {
totals[model] = 0.0;
}
@@ -236,13 +301,14 @@ namespace platform {
}
ranksOrder.push_back({ model, value });
}
// sort the ranksOrder vector by value
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
return a.second > b.second;
});
// Assign the ranks
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1;
ranks = assignRanks(ranksOrder);
if (ranksTotal.size() == 0) {
ranksTotal = ranks;
} else {
for (const auto& rank : ranks) {
ranksTotal[rank.first] += rank.second;
}
}
// Print the row with red colors on max values
for (const auto& model : models) {
@@ -279,20 +345,32 @@ namespace platform {
// Output the averaged ranks
cout << endl;
int min = 1;
for (const auto& rank : ranks) {
for (const auto& rank : ranksTotal) {
if (rank.second < min) {
min = rank.second;
}
}
cout << Colors::BLUE() << setw(30) << " Ranks....................";
for (const auto& model : models) {
string efectiveColor = Colors::BLUE();
if (ranksTotal[model] == min) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(4) << fixed << (double)ranksTotal[model] << " ";
}
cout << endl;
cout << Colors::GREEN() << setw(30) << " Averaged ranks...........";
for (const auto& model : models) {
string efectiveColor = Colors::GREEN();
if (ranks[model] == min) {
if (ranksTotal[model] == min) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " ";
cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " ";
}
cout << endl;
if (friedman) {
friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05);
}
}
void BestResults::reportAll()
{

View File

@@ -8,7 +8,7 @@ using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
string build();
void reportSingle();
void reportAll();
@@ -23,6 +23,7 @@ namespace platform {
string path;
string score;
string model;
bool friedman;
};
}
#endif //BESTRESULTS_H

View File

@@ -12,8 +12,9 @@ add_executable(best best.cc BestResults.cc Result.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
target_link_libraries(best stdc++fs)
target_link_libraries(best Boost::boost stdc++fs)
else()
target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
target_link_libraries(best Boost::boost)
endif()
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -13,12 +13,14 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied");
}
@@ -38,12 +40,18 @@ int main(int argc, char** argv)
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
if (friedman && model != "any") {
cerr << "Friedman test can only be used with all models" << endl;
cerr << program;
exit(1);
}
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
exit(1);
}
auto results = platform::BestResults(platform::Paths::results(), score, model);
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
if (build) {
if (model == "any") {
results.buildAll();