diff --git a/CMakeLists.txt b/CMakeLists.txt index 64b0304..c8200e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast") set(CMAKE_CXX_FLAGS_DEBUG " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g") # Options diff --git a/lib/libxlsxwriter b/lib/libxlsxwriter index f483e65..cf887d6 160000 --- a/lib/libxlsxwriter +++ b/lib/libxlsxwriter @@ -1 +1 @@ -Subproject commit f483e65f2e8364702c411ca54470482fe54666b2 +Subproject commit cf887d65ce31c74ce6fdc6f3c6163c1cc206e910 diff --git a/lib/mdlp b/lib/mdlp index c4e6c04..2db60e0 160000 --- a/lib/mdlp +++ b/lib/mdlp @@ -1 +1 @@ -Subproject commit c4e6c041fe7f769ec24c0a2bd66a5aff482fd630 +Subproject commit 2db60e007d70da876379373c53b6421f281daeac diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5b881e4..1e87c96 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,7 +2,7 @@ include_directories( ## Libs ${Platform_SOURCE_DIR}/lib/Files ${Platform_SOURCE_DIR}/lib/folding - ${Platform_SOURCE_DIR}/lib/mdlp + ${Platform_SOURCE_DIR}/lib/mdlp/src ${Platform_SOURCE_DIR}/lib/argparse/include ${Platform_SOURCE_DIR}/lib/json/include ${Platform_SOURCE_DIR}/lib/libxlsxwriter/include diff --git a/src/commands/b_main.cpp b/src/commands/b_main.cpp index 96a1054..32c2436 100644 --- a/src/commands/b_main.cpp +++ b/src/commands/b_main.cpp @@ -59,6 +59,7 @@ void manageArguments(argparse::ArgumentParser& program) smooth_arg.choices(choice); } program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true); + program.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true); program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true); program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true); @@ -87,7 +88,7 @@ int main(int argc, char** argv) manageArguments(program); std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat; json hyperparameters_json; - bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files; + bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph; std::vector seeds; std::vector file_names; std::vector filesToTest; @@ -103,6 +104,7 @@ int main(int argc, char** argv) smooth_strat = program.get("smooth-strat"); stratified = program.get("stratified"); quiet = program.get("quiet"); + graph = program.get("graph"); n_folds = program.get("folds"); seeds = program.get>("seeds"); auto hyperparameters = program.get("hyperparameters"); @@ -200,7 +202,7 @@ int main(int argc, char** argv) } platform::Timer timer; timer.start(); - experiment.go(filesToTest, quiet, no_train_score, generate_fold_files); + experiment.go(filesToTest, quiet, no_train_score, generate_fold_files, graph); experiment.setDuration(timer.getDuration()); if (!quiet) { // Classification report if only one dataset is tested @@ -209,6 +211,9 @@ int main(int argc, char** argv) if (saveResults) { experiment.saveResult(); } + if (graph) { + experiment.saveGraph(); + } std::cout << "Done!" << std::endl; return 0; } diff --git a/src/common/Dataset.cpp b/src/common/Dataset.cpp index b59ead4..26c0882 100644 --- a/src/common/Dataset.cpp +++ b/src/common/Dataset.cpp @@ -131,8 +131,7 @@ namespace platform { for (int i = 0; i < features.size(); ++i) { auto [max_value, idx] = torch::max(X_train.index({ i, "..." }), 0); states[features[i]] = std::vector(max_value.item() + 1); - auto item = states.at(features[i]); - iota(begin(item), end(item), 0); + iota(begin(states.at(features[i])), end(states.at(features[i])), 0); } auto [max_value, idx] = torch::max(y_train, 0); states[className] = std::vector(max_value.item() + 1); diff --git a/src/common/DiscretizationRegister.h b/src/common/DiscretizationRegister.h index 9a6a65a..7b4d817 100644 --- a/src/common/DiscretizationRegister.h +++ b/src/common/DiscretizationRegister.h @@ -11,4 +11,28 @@ static platform::RegistrarDiscretization registrarBU4("bin4u", [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::UNIFORM);}); static platform::RegistrarDiscretization registrarBQ4("bin4q", [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::QUANTILE);}); +static platform::RegistrarDiscretization registrarBU5("bin5u", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::UNIFORM);}); +static platform::RegistrarDiscretization registrarBQ5("bin5q", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::QUANTILE);}); +static platform::RegistrarDiscretization registrarBU6("bin6u", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::UNIFORM);}); +static platform::RegistrarDiscretization registrarBQ6("bin6q", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::QUANTILE);}); +static platform::RegistrarDiscretization registrarBU7("bin7u", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::UNIFORM);}); +static platform::RegistrarDiscretization registrarBQ7("bin7q", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::QUANTILE);}); +static platform::RegistrarDiscretization registrarBU8("bin8u", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::UNIFORM);}); +static platform::RegistrarDiscretization registrarBQ8("bin8q", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::QUANTILE);}); +static platform::RegistrarDiscretization registrarBU9("bin9u", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::UNIFORM);}); +static platform::RegistrarDiscretization registrarBQ9("bin9q", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::QUANTILE);}); +static platform::RegistrarDiscretization registrarBU10("bin10u", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::UNIFORM);}); +static platform::RegistrarDiscretization registrarBQ10("bin10q", + [](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::QUANTILE);}); #endif \ No newline at end of file diff --git a/src/common/DotEnv.h b/src/common/DotEnv.h index e3cbbad..a576a75 100644 --- a/src/common/DotEnv.h +++ b/src/common/DotEnv.h @@ -29,7 +29,7 @@ namespace platform { {"framework", {"bulma", "bootstrap"}}, {"margin", {"0.1", "0.2", "0.3"}}, {"n_folds", {"5", "10"}}, - {"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q"}}, + {"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q", "bin5q", "bin5u", "bin6q", "bin6u", "bin7q", "bin7u", "bin8q", "bin8u", "bin9q", "bin9u", "bin10q", "bin10u"}}, {"smooth_strat", {"ORIGINAL", "LAPLACE", "CESTNIK"}}, {"platform", {"any"}}, {"model", {"any"}}, diff --git a/src/common/Paths.h b/src/common/Paths.h index 1544b1b..6c6e471 100644 --- a/src/common/Paths.h +++ b/src/common/Paths.h @@ -10,6 +10,7 @@ namespace platform { static std::string hiddenResults() { return "hidden_results/"; } static std::string excel() { return "excel/"; } static std::string grid() { return "grid/"; } + static std::string graphs() { return "graphs/"; } static std::string datasets() { auto env = platform::DotEnv(); diff --git a/src/main/Experiment.cpp b/src/main/Experiment.cpp index 5e9fab2..aa6f6f1 100644 --- a/src/main/Experiment.cpp +++ b/src/main/Experiment.cpp @@ -24,7 +24,24 @@ namespace platform { { std::cout << result.getJson().dump(4) << std::endl; } - void Experiment::go(std::vector filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files) + void Experiment::saveGraph() + { + std::cout << "Saving graphs..." << std::endl; + auto data = result.getJson(); + for (const auto& item : data["results"]) { + auto graphs = item["graph"]; + int i = 0; + for (const auto& graph : graphs) { + i++; + auto fileName = Paths::graphs() + result.getFilename() + "_graph_" + item["dataset"].get() + "_" + std::to_string(i) + ".dot"; + auto file = std::ofstream(fileName); + file << graph.get(); + file.close(); + std::cout << "Graph saved in " << fileName << std::endl; + } + } + } + void Experiment::go(std::vector filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph) { for (auto fileName : filesToProcess) { if (fileName.size() > max_name) @@ -48,7 +65,7 @@ namespace platform { for (auto fileName : filesToProcess) { if (!quiet) std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush; - cross_validation(fileName, quiet, no_train_score, generate_fold_files); + cross_validation(fileName, quiet, no_train_score, generate_fold_files, graph); if (!quiet) std::cout << std::endl; } @@ -71,7 +88,7 @@ namespace platform { void showProgress(int fold, const std::string& color, const std::string& phase) { - std::string prefix = phase == "a" ? "" : "\b\b\b\b"; + std::string prefix = phase == "-" ? "" : "\b\b\b\b"; std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; } @@ -113,7 +130,7 @@ namespace platform { file << output.dump(4); file.close(); } - void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files) + void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph) { // // Load dataset and prepare data @@ -151,6 +168,7 @@ namespace platform { json confusion_matrices = json::array(); json confusion_matrices_train = json::array(); std::vector notes; + std::vector graphs; Timer train_timer, test_timer; int item = 0; bool first_seed = true; @@ -176,6 +194,8 @@ namespace platform { // for (int nfold = 0; nfold < nfolds; nfold++) { auto clf = Models::instance()->create(result.getModel()); + if (!quiet) + showProgress(nfold + 1, getColor(clf->getStatus()), "-"); setModelVersion(clf->getVersion()); auto valid = clf->getValidHyperparameters(); hyperparameters.check(valid, fileName); @@ -237,6 +257,13 @@ namespace platform { partial_result.addTimeTrain(train_time[item].item()); partial_result.addTimeTest(test_time[item].item()); item++; + if (graph) { + std::string result = ""; + for (const auto& line : clf->graph()) { + result += line + "\n"; + } + graphs.push_back(result); + } } if (!quiet) std::cout << "end. " << flush; @@ -245,6 +272,7 @@ namespace platform { // // Store result totals in Result // + partial_result.setGraph(graphs); partial_result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); partial_result.setScoreTestStd(torch::std(accuracy_test).item()).setScoreTrainStd(torch::std(accuracy_train).item()); partial_result.setTrainTime(torch::mean(train_time).item()).setTestTime(torch::mean(test_time).item()); diff --git a/src/main/Experiment.h b/src/main/Experiment.h index 26888ce..d036032 100644 --- a/src/main/Experiment.h +++ b/src/main/Experiment.h @@ -48,10 +48,11 @@ namespace platform { Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; } Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; } Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; } - void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files); - void go(std::vector filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files); + void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph); + void go(std::vector filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph); void saveResult(); void show(); + void saveGraph(); void report(bool classification_report = false); private: Result result; diff --git a/src/main/PartialResult.h b/src/main/PartialResult.h index 03dea41..d5e7667 100644 --- a/src/main/PartialResult.h +++ b/src/main/PartialResult.h @@ -15,6 +15,7 @@ namespace platform { data["times_train"] = json::array(); data["times_test"] = json::array(); data["notes"] = json::array(); + data["graph"] = json::array(); data["train_time"] = 0.0; data["train_time_std"] = 0.0; data["test_time"] = 0.0; @@ -27,6 +28,12 @@ namespace platform { data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end()); return *this; } + PartialResult& setGraph(const std::vector& graph) + { + json graph_ = graph; + data["graph"].insert(data["graph"].end(), graph_.begin(), graph_.end()); + return *this; + } PartialResult& setConfusionMatrices(const json& confusion_matrices) { data["confusion_matrices"] = confusion_matrices; return *this; } PartialResult& setConfusionMatricesTrain(const json& confusion_matrices) { data["confusion_matrices_train"] = confusion_matrices; return *this; } PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; } diff --git a/src/results/Result.h b/src/results/Result.h index 70d6c6b..e502692 100644 --- a/src/results/Result.h +++ b/src/results/Result.h @@ -28,6 +28,7 @@ namespace platform { std::string getModel() const { return data["model"].get(); }; std::string getPlatform() const { return data["platform"].get(); }; std::string getScoreName() const { return data["score_name"].get(); }; + bool isComplete() const { return complete; }; json getData() const { return data; } // Setters diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d6d9384..fce68a8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,7 +3,7 @@ if(ENABLE_TESTING) include_directories( ${Platform_SOURCE_DIR}/src ${Platform_SOURCE_DIR}/lib/argparse/include - ${Platform_SOURCE_DIR}/lib/mdlp + ${Platform_SOURCE_DIR}/lib/mdlp/src ${Platform_SOURCE_DIR}/lib/Files ${Platform_SOURCE_DIR}/lib/json/include ${Platform_SOURCE_DIR}/lib/folding diff --git a/tests/TestPlatform.cpp b/tests/TestPlatform.cpp index 7be2597..5450385 100644 --- a/tests/TestPlatform.cpp +++ b/tests/TestPlatform.cpp @@ -30,7 +30,7 @@ TEST_CASE("Test BayesNet version", "[BayesNet]") TEST_CASE("Test mdlp version", "[mdlp]") { std::string version = mdlp::CPPFImdlp::version(); - REQUIRE(version == "1.2.1"); + REQUIRE(version == "2.0.0"); } TEST_CASE("Test Arff version", "[Arff]") {