refactor_folders #1

Merged
rmontanana merged 3 commits from refactor_folders into main 2024-02-29 16:29:32 +00:00
60 changed files with 190 additions and 76 deletions

View File

@@ -5,7 +5,7 @@ diagrams:
type: class
glob:
- src/*.cc
- src/Command/*.cc
- src/modules/*.cc
using_namespace: platform
include:
namespaces:
@@ -17,7 +17,7 @@ diagrams:
sequence:
type: sequence
glob:
- src/Command/b_main.cc
- src/b_main.cc
combine_free_functions_into_file_participants: true
using_namespace:
- std

24
.vscode/launch.json vendored
View File

@@ -2,9 +2,9 @@
"version": "0.2.0",
"configurations": [
{
"name": "sample",
"type": "lldb",
"request": "launch",
"name": "sample",
"program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
"args": [
"-d",
@@ -14,14 +14,14 @@
"-s",
"271",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets/",
"${workspaceFolder}/../discretizbench/datasets/",
],
//"cwd": "${workspaceFolder}/build/sample/",
},
{
"name": "experimentPy",
"type": "lldb",
"request": "launch",
"name": "experimentPy",
"program": "${workspaceFolder}/build_debug/src/b_main",
"args": [
"-m",
@@ -36,9 +36,9 @@
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"name": "gridsearch",
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/b_grid",
"args": [
"-m",
@@ -52,9 +52,9 @@
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"name": "experimentBayes",
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/b_main",
"args": [
"-m",
@@ -69,23 +69,23 @@
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
"name": "best",
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build_debug/src/b_best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
"--excel"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"name": "manage",
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build_debug/src/b_manage",
"args": [
"-n",
@@ -94,18 +94,20 @@
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"name": "list",
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build_debug/src/b_list",
"args": [],
"args": [
"--excel"
],
//"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"name": "test",
"type": "lldb",
"request": "launch",
"name": "test",
"program": "${workspaceFolder}/build_debug/tests/unit_tests",
"args": [
"-c=\"Metrics Test\"",

View File

@@ -78,7 +78,6 @@ message("XLSXWRITER_LIB=${XLSXWRITER_LIB}")
# Subdirectories
# --------------
set(Platform_MODULES ${Platform_SOURCE_DIR}/src/modules/)
add_subdirectory(config)
add_subdirectory(src)
add_subdirectory(sample)

View File

@@ -96,22 +96,6 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
done
@echo ">>> Done";
opt = ""
testp: ## Run platform tests (opt="-s") to verbose output the tests, (opt="-c='Stratified Fold Test'") to run only that section
@echo ">>> Running Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_platform $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_platform ]; then cd $(f_debug)/tests ; ./unit_tests_platform $(opt) ; fi ;
@echo ">>> Done";
opt = ""
testb: ## Run Platform tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_bayesnet $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_bayesnet ]; then cd $(f_debug)/tests ; ./unit_tests_bayesnet $(opt) ; fi ;
@echo ">>> Done";
coverage: ## Run tests and generate coverage report (build/index.html)
@echo ">>> Building tests with coverage..."
@$(MAKE) test

View File

@@ -1,5 +1,6 @@
include_directories(
${Platform_SOURCE_DIR}/src/modules
${Platform_SOURCE_DIR}/src/common
${Platform_SOURCE_DIR}/src/main
${Platform_SOURCE_DIR}/lib/PyClassifiers/src
${Python3_INCLUDE_DIRS}
${Platform_SOURCE_DIR}/lib/Files
@@ -10,5 +11,5 @@ include_directories(
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/lib/json/include
${CMAKE_BINARY_DIR}/configured_files/include
)
add_executable(PlatformSample sample.cc ${Platform_MODULES}/Models.cc)
add_executable(PlatformSample sample.cc ${Platform_SOURCE_DIR}/src/main/Models.cc)
target_link_libraries(PlatformSample PyClassifiers ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,10 +1,10 @@
include_directories(
## Libs
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/src
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/lib/folding
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/lib/mdlp
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/lib/json/include
${Platform_SOURCE_DIR}/lib/PyClassifiers/src
${Platform_MODULES}
${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/mdlp
${Platform_SOURCE_DIR}/lib/argparse/include
@@ -13,24 +13,41 @@ include_directories(
${Python3_INCLUDE_DIRS}
${MPI_CXX_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/configured_files/include
## Platform
${Platform_SOURCE_DIR}/src/common
${Platform_SOURCE_DIR}/src/best
${Platform_SOURCE_DIR}/src/grid
${Platform_SOURCE_DIR}/src/main
${Platform_SOURCE_DIR}/src/manage
${Platform_SOURCE_DIR}/src/reports
)
set(best_sources BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
list(TRANSFORM best_sources PREPEND ${Platform_MODULES})
add_executable(b_best b_best.cc ${best_sources})
set(grid_sources GridSearch.cc GridData.cc HyperParameters.cc Datasets.cc Dataset.cc Models.cc)
list(TRANSFORM grid_sources PREPEND ${Platform_MODULES})
add_executable(b_grid b_grid.cc ${grid_sources})
add_executable(b_list b_list.cc ${Platform_MODULES}Datasets.cc ${Platform_MODULES}Dataset.cc)
set(main_sources Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc Result.cc)
list(TRANSFORM main_sources PREPEND ${Platform_MODULES})
add_executable(b_main b_main.cc ${main_sources})
set(manage_sources Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
list(TRANSFORM manage_sources PREPEND ${Platform_MODULES})
add_executable(b_manage b_manage.cc ${manage_sources})
# b_best
set(best_sources b_best.cc BestResults.cc Statistics.cc BestResultsExcel.cc)
list(TRANSFORM best_sources PREPEND best/)
add_executable(b_best ${best_sources} main/Result.cc reports/ReportExcel.cc reports/ReportBase.cc reports/ExcelFile.cc common/Datasets.cc common/Dataset.cc)
target_link_libraries(b_best Boost::boost "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp)
# b_grid
set(grid_sources b_grid.cc GridSearch.cc GridData.cc)
list(TRANSFORM grid_sources PREPEND grid/)
add_executable(b_grid ${grid_sources} main/HyperParameters.cc main/Models.cc common/Datasets.cc common/Dataset.cc)
target_link_libraries(b_grid PyClassifiers ${MPI_CXX_LIBRARIES} ArffFiles)
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(b_main PyClassifiers BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
# b_list
set(list_sources b_list.cc DatasetsExcel.cc)
list(TRANSFORM list_sources PREPEND list/)
add_executable(b_list ${list_sources} common/Datasets.cc common/Dataset.cc reports/ReportExcel.cc reports/ExcelFile.cc reports/ReportBase.cc)
target_link_libraries(b_list "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
# b_main
set(main_sources b_main.cc Experiment.cc Models.cc HyperParameters.cc)
list(TRANSFORM main_sources PREPEND main/)
add_executable(b_main ${main_sources} common/Datasets.cc common/Dataset.cc reports/ReportConsole.cc reports/ReportBase.cc main/Result.cc)
target_link_libraries(b_main PyClassifiers BayesNet ArffFiles mdlp)
# b_manage
set(manage_sources b_manage.cc ManageResults.cc CommandParser.cc Results.cc)
list(TRANSFORM manage_sources PREPEND manage/)
add_executable(b_manage ${manage_sources} main/Result.cc reports/ReportConsole.cc reports/ReportExcel.cc reports/ReportBase.cc reports/ExcelFile.cc common/Datasets.cc common/Dataset.cc)
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)

View File

@@ -302,9 +302,9 @@ namespace platform {
// Build the table of results
json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel(score, datasets);
excel.reportSingle(model, path + bestResultFile());
messageExcelFile(excel.getFileName());
BestResultsExcel excel_report(score, datasets);
excel_report.reportSingle(model, path + bestResultFile());
messageExcelFile(excel_report.getFileName());
}
}
void BestResults::reportAll(bool excel)

View File

@@ -32,7 +32,8 @@ namespace platform {
}
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
{
workbook = workbook_new((Paths::excel() + fileName).c_str());
file_name = "BestResults.xlsx";
workbook = workbook_new(getFileName().c_str());
setProperties("Best Results");
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
datasetNameSize = std::max(datasetNameSize, maxDatasetName);
@@ -170,10 +171,7 @@ namespace platform {
doFriedman();
}
}
std::string BestResultsExcel::getFileName()
{
return Paths::excel() + fileName;
}
void BestResultsExcel::header(bool ranks)
{
row = 0;

View File

@@ -9,13 +9,12 @@ using json = nlohmann::json;
namespace platform {
class BestResultsExcel : ExcelFile {
class BestResultsExcel : public ExcelFile {
public:
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
~BestResultsExcel();
void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
void reportSingle(const std::string& model, const std::string& fileName);
std::string getFileName();
private:
void build();
void header(bool ranks);
@@ -24,7 +23,6 @@ namespace platform {
void formatColumns();
void doFriedman();
void addConditionalFormat(std::string formula);
const std::string fileName = "BestResults.xlsx";
std::string score;
std::vector<std::string> models;
std::vector<std::string> datasets;

View File

@@ -65,5 +65,6 @@ int main(int argc, char** argv)
std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl;
results.reportSingle(excel);
}
std::cout << Colors::RESET();
return 0;
}

55
src/list/DatasetsExcel.cc Normal file
View File

@@ -0,0 +1,55 @@
#include <sstream>
#include "DatasetsExcel.h"
#include "Paths.h"
namespace platform {
DatasetsExcel::DatasetsExcel(json& data) : data(data), ExcelFile()
{
file_name = "datasets.xlsx";
workbook = workbook_new(getFileName().c_str());
createFormats();
setProperties("Datasets");
}
DatasetsExcel::~DatasetsExcel()
{
workbook_close(workbook);
}
void DatasetsExcel::report()
{
int datasetNameSize = 25; // Min size of the column
int balanceSize = 75; // Min size of the column
worksheet = workbook_add_worksheet(workbook, "Datasets");
worksheet_merge_range(worksheet, 0, 0, 0, 5, "Datasets", styles["headerFirst"]);
formatColumns(datasetNameSize, balanceSize);
// Body header
row = 2;
int col = 0;
int i = 0;
for (const auto& name : { "", "Dataset", "Samples", "Features", "Classes", "Balance" }) {
writeString(row, col++, name, "bodyHeader");
}
for (auto& [key, value] : data.items()) {
row++;
if (key.size() > datasetNameSize) {
datasetNameSize = key.size();
}
writeInt(row, 0, i++, "ints");
writeString(row, 1, key.c_str(), "text");
writeInt(row, 2, value["samples"], "ints");
writeInt(row, 3, value["features"], "ints");
writeInt(row, 4, value["classes"], "ints");
writeString(row, 5, value["balance"].get<std::string>().c_str(), "text");
}
row++;
formatColumns(datasetNameSize, balanceSize);
}
void DatasetsExcel::formatColumns(int dataset, int balance)
{
worksheet_freeze_panes(worksheet, 4, 2);
std::vector<int> columns_sizes = { 5, dataset, 10, 10, 10, balance };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
}

23
src/list/DatasetsExcel.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef DATASETS_EXCEL_H
#define DATASETS_EXCEL_H
#include "ExcelFile.h"
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class DatasetsExcel : public ExcelFile {
public:
explicit DatasetsExcel(json& data);
~DatasetsExcel();
void report();
private:
void formatColumns(int dataset, int balance);
json data;
};
}
#endif //DATASETS_EXCEL_H

View File

@@ -1,8 +1,12 @@
#include <iostream>
#include <locale>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "Paths.h"
#include "Colors.h"
#include "Datasets.h"
#include "DatasetsExcel.h"
#include "config.h"
const int BALANCE_LENGTH = 75;
@@ -12,7 +16,7 @@ struct separated : numpunct<char> {
std::string do_grouping() const { return "\03"; }
};
void outputBalance(const std::string& balance)
std::string outputBalance(const std::string& balance)
{
auto temp = std::string(balance);
while (temp.size() > BALANCE_LENGTH - 1) {
@@ -21,12 +25,19 @@ void outputBalance(const std::string& balance)
std::cout << setw(52) << " ";
temp = temp.substr(BALANCE_LENGTH);
}
std::cout << temp << std::endl;
return temp;
}
int main(int argc, char** argv)
{
auto data = platform::Datasets(false, platform::Paths::datasets());
auto datasets = platform::Datasets(false, platform::Paths::datasets());
argparse::ArgumentParser program("b_list", { project_version.begin(), project_version.end() });
program.add_argument("--excel")
.help("Output in Excel format")
.default_value(false)
.implicit_value(true);
program.parse_args(argc, argv);
auto excel = program.get<bool>("--excel");
locale mylocale(std::cout.getloc(), new separated);
locale::global(mylocale);
std::cout.imbue(mylocale);
@@ -34,23 +45,36 @@ int main(int argc, char** argv)
std::string balanceBars = std::string(BALANCE_LENGTH, '=');
std::cout << "=== ============================== ====== ===== === " << balanceBars << std::endl;
int num = 0;
for (const auto& dataset : data.getNames()) {
json data;
for (const auto& dataset : datasets.getNames()) {
auto color = num % 2 ? Colors::CYAN() : Colors::BLUE();
std::cout << color << setw(3) << right << num++ << " ";
std::cout << setw(30) << left << dataset << " ";
data.loadDataset(dataset);
auto nSamples = data.getNSamples(dataset);
datasets.loadDataset(dataset);
auto nSamples = datasets.getNSamples(dataset);
std::cout << setw(6) << right << nSamples << " ";
std::cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
std::cout << setw(3) << right << data.getNClasses(dataset) << " ";
std::cout << setw(5) << right << datasets.getFeatures(dataset).size() << " ";
std::cout << setw(3) << right << datasets.getNClasses(dataset) << " ";
std::stringstream oss;
std::string sep = "";
for (auto number : data.getClassesCounts(dataset)) {
for (auto number : datasets.getClassesCounts(dataset)) {
oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / ";
}
outputBalance(oss.str());
auto balance = outputBalance(oss.str());
std::cout << balance << std::endl;
// Store data for Excel report
data[dataset] = json::object();
data[dataset]["samples"] = nSamples;
data[dataset]["features"] = datasets.getFeatures(dataset).size();
data[dataset]["classes"] = datasets.getNClasses(dataset);
data[dataset]["balance"] = oss.str();
}
std::cout << Colors::RESET() << std::endl;
if (excel) {
auto report = platform::DatasetsExcel(data);
report.report();
std::cout << "Output saved in " << report.getFileName() << std::endl;
}
return 0;
}

View File

@@ -164,7 +164,7 @@ namespace platform {
if (indexList) {
std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1);
} else {
std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).getJson()["results"].size() - 1);
std::tie(option, subIndex) = parser.parse(Colors::CYAN(), listOptions, 'r', results.at(index).getJson()["results"].size() - 1);
}
switch (option) {
case 'q':

View File

@@ -1,4 +1,5 @@
#include "ExcelFile.h"
#include "Paths.h"
namespace platform {
ExcelFile::ExcelFile()
@@ -26,6 +27,10 @@ namespace platform {
{
return workbook;
}
std::string ExcelFile::getFileName()
{
return Paths::excel() + file_name;
}
void ExcelFile::setProperties(std::string title)
{
char line[title.size() + 1];
@@ -82,11 +87,13 @@ namespace platform {
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_text_wrap(style);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
@@ -96,18 +103,22 @@ namespace platform {
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}

View File

@@ -19,6 +19,7 @@ namespace platform {
ExcelFile(lxw_workbook* workbook);
ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet);
lxw_workbook* getWorkbook();
std::string getFileName();
protected:
void setProperties(std::string title);
void writeString(int row, int col, const std::string& text, const std::string& style = "");
@@ -36,6 +37,7 @@ namespace platform {
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
std::string file_name;
private:
void setDefault();
};

View File

@@ -94,10 +94,10 @@ namespace platform {
std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
std::cout << std::string(MAXL, '*') << std::endl;
} else {
footer(totalScore);
}
std::cout << std::string(MAXL, '*') << Colors::RESET() << std::endl;
}
void ReportConsole::showSummary()
{
@@ -124,6 +124,5 @@ namespace platform {
if (!getExistBestFile() && compare) {
std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
}
std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET();
}
}

View File

@@ -9,6 +9,7 @@ namespace platform {
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet)
{
createFile();
createFormats();
}
void ReportExcel::formatColumns()
@@ -49,7 +50,6 @@ namespace platform {
createWorksheet();
}
setProperties(data["title"].get<std::string>());
createFormats();
formatColumns();
}