Add excel to best report of model

This commit is contained in:
Ricardo Montañana Gómez 2023-10-19 18:12:55 +02:00
parent 64fc97b892
commit 38423048bd
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
5 changed files with 159 additions and 54 deletions

View File

@ -27,7 +27,6 @@ std::string ftime_to_string(TP tp)
return buffer.str();
}
namespace platform {
string BestResults::build()
{
auto files = loadResultFiles();
@ -65,12 +64,10 @@ namespace platform {
file.close();
return bestFileName;
}
string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
pair<string, string> getModelScore(string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
@ -82,7 +79,6 @@ namespace platform {
string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
vector<string> BestResults::loadResultFiles()
{
vector<string> files;
@ -99,7 +95,6 @@ namespace platform {
}
return files;
}
json BestResults::loadFile(const string& fileName)
{
ifstream resultData(fileName);
@ -136,7 +131,6 @@ namespace platform {
}
return datasets;
}
void BestResults::buildAll()
{
auto models = getModels();
@ -147,8 +141,7 @@ namespace platform {
}
model = "any";
}
void BestResults::reportSingle()
void BestResults::listFile()
{
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
@ -162,22 +155,29 @@ namespace platform {
auto data = loadFile(bestFileName);
auto datasets = getDatasets(data);
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
cout << "--------------------------------------------------------" << endl;
stringstream oss;
oss << Colors::GREEN() << "Best results for " << model << " as of " << date << endl;
cout << oss.str();
cout << string(oss.str().size() - 8, '-') << endl;
cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset") << "Score File Hyperparameters" << endl;
cout << "=== " << string(maxDatasetName, '=') << " =========== ================================================================== ================================================= " << endl;
auto i = 0;
bool odd = true;
double total = 0;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
double value = item.value().at(0).get<double>();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(maxDatasetName) << left << item.key() << " ";
cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
cout << setw(11) << setprecision(9) << fixed << value << " ";
cout << setw(66) << item.value().at(2).get<string>() << " ";
cout << item.value().at(1) << " ";
cout << endl;
total += value;
odd = !odd;
}
cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " ===========" << endl;
cout << setw(5 + maxDatasetName) << "Total.................. " << setw(11) << setprecision(8) << fixed << total << endl;
}
json BestResults::buildTableResults(vector<string> models)
{
@ -202,11 +202,12 @@ namespace platform {
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(vector<string> models, json table)
{
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
cout << "------------------------------------------------" << endl;
stringstream oss;
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
cout << oss.str();
cout << string(oss.str().size() - 8, '-') << endl;
cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset");
for (const auto& model : models) {
cout << setw(maxModelName) << left << model << " ";
@ -271,6 +272,19 @@ namespace platform {
}
cout << endl;
}
void BestResults::reportSingle(bool excel)
{
listFile();
if (excel) {
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
vector<string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel(score, datasets);
excel.reportSingle(model, path + bestResultFile());
messageExcelFile(excel.getFileName());
}
}
void BestResults::reportAll(bool excel)
{
auto models = getModels();
@ -292,9 +306,32 @@ namespace platform {
ranksModels = stats.getRanks();
}
if (excel) {
BestResultsExcel excel(score, models, datasets, table, ranksModels, friedman, significance);
excel.build();
cout << Colors::YELLOW() << "** Excel file generated: " << excel.getFileName() << Colors::RESET() << endl;
BestResultsExcel excel(score, datasets);
excel.reportAll(models, table, ranksModels, friedman, significance);
if (friedman) {
int idx = -1;
double min = 2000;
// Find out the control model
auto totals = vector<double>(models.size(), 0.0);
for (const auto& dataset : datasets) {
for (int i = 0; i < models.size(); ++i) {
totals[i] += ranksModels[dataset][models[i]];
}
}
for (int i = 0; i < models.size(); ++i) {
if (totals[i] < min) {
min = totals[i];
idx = i;
}
}
model = models.at(idx);
excel.reportSingle(model, path + bestResultFile());
}
messageExcelFile(excel.getFileName());
}
}
void BestResults::messageExcelFile(const string& fileName)
{
cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << endl;
}
}

View File

@ -7,19 +7,24 @@ using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const string& path, const string& score, const string& model, bool friedman, double significance = 0.05) : path(path), score(score), model(model), friedman(friedman), significance(significance) {}
explicit BestResults(const string& path, const string& score, const string& model, bool friedman, double significance = 0.05)
: path(path), score(score), model(model), friedman(friedman), significance(significance)
{
}
string build();
void reportSingle();
void reportSingle(bool excel);
void reportAll(bool excel);
void buildAll();
private:
vector<string> getModels();
vector<string> getDatasets(json table);
vector<string> loadResultFiles();
void messageExcelFile(const string& fileName);
json buildTableResults(vector<string> models);
void printTableResults(vector<string> models, json table);
string bestResultFile();
json loadFile(const string& fileName);
void listFile();
string path;
string score;
string model;

View File

@ -4,18 +4,87 @@
#include "Statistics.h"
namespace platform {
BestResultsExcel::BestResultsExcel(const string& score, const vector<string>& models, const vector<string>& datasets, const json& table, const map<string, map<string, float>>& ranksModels, bool friedman, double significance) :
score(score), models(models), datasets(datasets), table(table), ranksModels(ranksModels), friedman(friedman), significance(significance)
BestResultsExcel::BestResultsExcel(const string& score, const vector<string>& datasets) : score(score), datasets(datasets)
{
workbook = workbook_new((Paths::excel() + fileName).c_str());
worksheet = workbook_add_worksheet(workbook, "Best Results");
setProperties("Best Results");
createFormats();
int maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
modelNameSize = max(modelNameSize, maxModelName);
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
datasetNameSize = max(datasetNameSize, maxDatasetName);
createFormats();
}
void BestResultsExcel::reportAll(const vector<string>& models, const json& table, const map<string, map<string, float>>& ranks, bool friedman, double significance)
{
this->table = table;
this->models = models;
ranksModels = ranks;
this->friedman = friedman;
this->significance = significance;
worksheet = workbook_add_worksheet(workbook, "Best Results");
int maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
modelNameSize = max(modelNameSize, maxModelName);
formatColumns();
build();
}
void BestResultsExcel::reportSingle(const string& model, const string& fileName)
{
worksheet = workbook_add_worksheet(workbook, "Report");
if (FILE* fileTest = fopen(fileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << "File " << fileName << " doesn't exist." << endl;
exit(1);
}
json data;
ifstream resultData(fileName);
if (resultData.is_open()) {
data = json::parse(resultData);
} else {
throw invalid_argument("Unable to open result file. [" + fileName + "]");
}
string title = "Best results for " + model;
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]);
// Body header
row = 3;
int col = 1;
writeString(row, 0, "", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader");
writeString(row, 2, "Score", "bodyHeader");
writeString(row, 3, "File", "bodyHeader");
writeString(row, 4, "Hyperparameters", "bodyHeader");
auto i = 0;
string hyperparameters;
int hypSize = 0;
for (auto const& item : data.items()) {
row++;
writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text");
writeDouble(row, 2, item.value().at(0).get<double>(), "result");
writeString(row, 3, item.value().at(2).get<string>(), "text");
try {
hyperparameters = item.value().at(1).get<string>();
}
catch (const exception& err) {
stringstream oss;
oss << item.value().at(1);
hyperparameters = oss.str();
}
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, 4, hyperparameters, "text");
}
row++;
// Set Totals
writeString(row, 1, "Total", "bodyHeader");
stringstream oss;
oss << "=sum(indirect(address(5, 3)):indirect(address(" << row << ", 3)))";
worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]);
// Set format
worksheet_freeze_panes(worksheet, 4, 2);
vector<int> columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
BestResultsExcel::~BestResultsExcel()
{
@ -99,7 +168,7 @@ namespace platform {
int col = 1;
for (const auto& model : models) {
stringstream oss;
oss << "=sum(indirect(address(" << 5 << "," << col + 2 << ")):indirect(address(" << row - 1 << "," << col + 2 << ")))/" << datasets.size();
oss << "=sum(indirect(address(5, " << col + 2 << ")):indirect(address(" << row - 1 << "," << col + 2 << ")))/" << datasets.size();
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
}
}

View File

@ -12,11 +12,13 @@ namespace platform {
class BestResultsExcel : ExcelFile {
public:
BestResultsExcel(const string& score, const vector<string>& models, const vector<string>& datasets, const json& table, const map<string, map<string, float>>& ranks, bool friedman, double significance);
BestResultsExcel(const string& score, const vector<string>& datasets);
~BestResultsExcel();
void build();
void reportAll(const vector<string>& models, const json& table, const map<string, map<string, float>>& ranks, bool friedman, double significance);
void reportSingle(const string& model, const string& fileName);
string getFileName();
private:
void build();
void header(bool ranks);
void body(bool ranks);
void footer(bool ranks);

View File

@ -29,15 +29,24 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
catch (...) {
throw runtime_error("Number of folds must be an decimal number");
}});
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
string model, score;
bool build, report, friedman, excel;
double level;
try {
program.parse_args(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
auto excel = program.get<bool>("excel");
auto level = program.get<double>("level");
model = program.get<string>("model");
score = program.get<string>("score");
build = program.get<bool>("build");
report = program.get<bool>("report");
friedman = program.get<bool>("friedman");
excel = program.get<bool>("excel");
level = program.get<double>("level");
if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied");
}
@ -46,11 +55,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
cerr << program;
exit(1);
}
if (excel && model != "any") {
cerr << "Excel ourput can only be used with all models" << endl;
cerr << program;
exit(1);
}
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
@ -62,19 +66,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
auto excel = program.get<bool>("excel");
auto level = program.get<double>("level");
// Generate report
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman, level);
if (build) {
if (model == "any") {
@ -88,7 +80,7 @@ int main(int argc, char** argv)
if (model == "any") {
results.reportAll(excel);
} else {
results.reportSingle();
results.reportSingle(excel);
}
}
return 0;