From 89c4613591ec7cb66b70a8577d5350db9415a62e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 18 Nov 2023 11:56:10 +0100 Subject: [PATCH] Implement hyperparameters with json file --- src/BayesNet/BaseClassifier.h | 2 +- src/BayesNet/BoostAODE.cc | 2 +- src/BayesNet/BoostAODE.h | 4 ++-- src/BayesNet/Classifier.cc | 4 ++-- src/BayesNet/Classifier.h | 4 ++-- src/BayesNet/KDB.cc | 2 +- src/BayesNet/KDB.h | 4 ++-- src/BayesNet/SPODE.h | 2 +- src/BayesNet/TAN.h | 2 +- src/Platform/CMakeLists.txt | 2 +- src/Platform/Experiment.cc | 7 +++---- src/Platform/Experiment.h | 23 ++++++++++----------- src/Platform/HyperParameters.cc | 33 +++++++++++++++++++++++++++++++ src/Platform/HyperParameters.h | 22 +++++++++++++++++++++ src/Platform/b_main.cc | 23 ++++++++++++++++----- src/PyClassifiers/ODTE.cc | 2 +- src/PyClassifiers/ODTE.h | 2 +- src/PyClassifiers/PyClassifier.cc | 2 +- src/PyClassifiers/PyClassifier.h | 2 +- src/PyClassifiers/RandomForest.cc | 2 +- src/PyClassifiers/RandomForest.h | 2 +- src/PyClassifiers/STree.cc | 2 +- src/PyClassifiers/STree.h | 2 +- src/PyClassifiers/SVC.cc | 2 +- src/PyClassifiers/SVC.h | 2 +- 25 files changed, 112 insertions(+), 44 deletions(-) create mode 100644 src/Platform/HyperParameters.cc create mode 100644 src/Platform/HyperParameters.h diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index ca1a5b6..ffbe5f2 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -29,7 +29,7 @@ namespace bayesnet { virtual std::string getVersion() = 0; std::vector virtual topological_order() = 0; void virtual dump_cpt()const = 0; - virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0; + virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0; }; } #endif \ No newline at end of file diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc index 78b7a0d..059fec4 100644 --- a/src/BayesNet/BoostAODE.cc +++ b/src/BayesNet/BoostAODE.cc @@ -43,7 +43,7 @@ namespace bayesnet { y_train = y_; } } - void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters) + void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid const std::vector validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" }; diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h index cc45cba..670c696 100644 --- a/src/BayesNet/BoostAODE.h +++ b/src/BayesNet/BoostAODE.h @@ -8,9 +8,9 @@ namespace bayesnet { class BoostAODE : public Ensemble { public: BoostAODE(); - virtual ~BoostAODE() {}; + virtual ~BoostAODE() = default; std::vector graph(const std::string& title = "BoostAODE") const override; - void setHyperparameters(nlohmann::json& hyperparameters) override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; protected: void buildModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override; diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index 53db344..5d46cb0 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -153,7 +153,7 @@ namespace bayesnet { { model.dump_cpt(); } - void Classifier::checkHyperparameters(const std::vector& validKeys, nlohmann::json& hyperparameters) + void Classifier::checkHyperparameters(const std::vector& validKeys, const nlohmann::json& hyperparameters) { for (const auto& item : hyperparameters.items()) { if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) { @@ -161,7 +161,7 @@ namespace bayesnet { } } } - void Classifier::setHyperparameters(nlohmann::json& hyperparameters) + void Classifier::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid, default is no hyperparameters const std::vector validKeys = { }; diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h index d4a9c42..f187a0e 100644 --- a/src/BayesNet/Classifier.h +++ b/src/BayesNet/Classifier.h @@ -22,7 +22,7 @@ namespace bayesnet { void checkFitParameters(); virtual void buildModel(const torch::Tensor& weights) = 0; void trainModel(const torch::Tensor& weights) override; - void checkHyperparameters(const std::vector& validKeys, nlohmann::json& hyperparameters); + void checkHyperparameters(const std::vector& validKeys, const nlohmann::json& hyperparameters); void buildDataset(torch::Tensor& y); public: Classifier(Network model); @@ -44,7 +44,7 @@ namespace bayesnet { std::vector show() const override; std::vector topological_order() override; void dump_cpt() const override; - void setHyperparameters(nlohmann::json& hyperparameters) override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; }; } #endif diff --git a/src/BayesNet/KDB.cc b/src/BayesNet/KDB.cc index 3c344a7..a6ed0c8 100644 --- a/src/BayesNet/KDB.cc +++ b/src/BayesNet/KDB.cc @@ -2,7 +2,7 @@ namespace bayesnet { KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} - void KDB::setHyperparameters(nlohmann::json& hyperparameters) + void KDB::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid const std::vector validKeys = { "k", "theta" }; diff --git a/src/BayesNet/KDB.h b/src/BayesNet/KDB.h index 7dbc8f4..95fb315 100644 --- a/src/BayesNet/KDB.h +++ b/src/BayesNet/KDB.h @@ -13,8 +13,8 @@ namespace bayesnet { void buildModel(const torch::Tensor& weights) override; public: explicit KDB(int k, float theta = 0.03); - virtual ~KDB() {}; - void setHyperparameters(nlohmann::json& hyperparameters) override; + virtual ~KDB() = default; + void setHyperparameters(const nlohmann::json& hyperparameters) override; std::vector graph(const std::string& name = "KDB") const override; }; } diff --git a/src/BayesNet/SPODE.h b/src/BayesNet/SPODE.h index 6621263..96b7834 100644 --- a/src/BayesNet/SPODE.h +++ b/src/BayesNet/SPODE.h @@ -10,7 +10,7 @@ namespace bayesnet { void buildModel(const torch::Tensor& weights) override; public: explicit SPODE(int root); - virtual ~SPODE() {}; + virtual ~SPODE() = default; std::vector graph(const std::string& name = "SPODE") const override; }; } diff --git a/src/BayesNet/TAN.h b/src/BayesNet/TAN.h index afbf2eb..ecb803d 100644 --- a/src/BayesNet/TAN.h +++ b/src/BayesNet/TAN.h @@ -8,7 +8,7 @@ namespace bayesnet { void buildModel(const torch::Tensor& weights) override; public: TAN(); - virtual ~TAN() {}; + virtual ~TAN() = default; std::vector graph(const std::string& name = "TAN") const override; }; } diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 24311cf..fba1656 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) include_directories(${Python3_INCLUDE_DIRS}) -add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc) +add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc) add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) add_executable(b_list b_list.cc Datasets.cc Dataset.cc) add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index d683815..81d9755 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -26,7 +26,6 @@ namespace platform { oss << std::put_time(timeinfo, "%H:%M:%S"); return oss.str(); } - Experiment::Experiment() : hyperparameters(json::parse("{}")) {} std::string Experiment::get_file_name() { std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; @@ -148,7 +147,7 @@ namespace platform { auto result = Result(); auto [values, counts] = at::_unique(y); result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0)); - result.setHyperparameters(hyperparameters); + result.setHyperparameters(hyperparameters.get(fileName)); // Initialize results std::vectors int nResults = nfolds * static_cast(randomSeeds.size()); auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64); @@ -171,8 +170,8 @@ namespace platform { for (int nfold = 0; nfold < nfolds; nfold++) { auto clf = Models::instance()->create(model); setModelVersion(clf->getVersion()); - if (hyperparameters.size() != 0) { - clf->setHyperparameters(hyperparameters); + if (hyperparameters.notEmpty(fileName)) { + clf->setHyperparameters(hyperparameters.get(fileName)); } // Split train - test dataset train_timer.start(); diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h index 00438b6..c00d7ff 100644 --- a/src/Platform/Experiment.h +++ b/src/Platform/Experiment.h @@ -6,6 +6,7 @@ #include #include "Folding.h" #include "BaseClassifier.h" +#include "HyperParameters.h" #include "TAN.h" #include "KDB.h" #include "AODE.h" @@ -80,17 +81,8 @@ namespace platform { const std::vector& getTimesTest() const { return times_test; } }; class Experiment { - private: - std::string title, model, platform, score_name, model_version, language_version, language; - bool discretized{ false }, stratified{ false }; - std::vector results; - std::vector randomSeeds; - json hyperparameters = "{}"; - int nfolds{ 0 }; - float duration{ 0 }; - json build_json(); public: - Experiment(); + Experiment() = default; Experiment& setTitle(const std::string& title) { this->title = title; return *this; } Experiment& setModel(const std::string& model) { this->model = model; return *this; } Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; } @@ -104,13 +96,22 @@ namespace platform { Experiment& addResult(Result result) { results.push_back(result); return *this; } Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; } Experiment& setDuration(float duration) { this->duration = duration; return *this; } - Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } + Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; } std::string get_file_name(); void save(const std::string& path); void cross_validation(const std::string& fileName, bool quiet); void go(std::vector filesToProcess, bool quiet); void show(); void report(); + private: + std::string title, model, platform, score_name, model_version, language_version, language; + bool discretized{ false }, stratified{ false }; + std::vector results; + std::vector randomSeeds; + HyperParameters hyperparameters; + int nfolds{ 0 }; + float duration{ 0 }; + json build_json(); }; } #endif \ No newline at end of file diff --git a/src/Platform/HyperParameters.cc b/src/Platform/HyperParameters.cc new file mode 100644 index 0000000..452e0a9 --- /dev/null +++ b/src/Platform/HyperParameters.cc @@ -0,0 +1,33 @@ +#include "HyperParameters.h" +#include + +namespace platform { + HyperParameters::HyperParameters(const std::vector& datasets, const json& hyperparameters_) + { + // Initialize all datasets with the given hyperparameters + for (const auto& item : datasets) { + hyperparameters[item] = hyperparameters_; + } + } + HyperParameters::HyperParameters(const std::vector& datasets, const std::string& hyperparameters_file) + { + // Check if file exists + std::ifstream file(hyperparameters_file); + if (!file.is_open()) { + throw std::runtime_error("File " + hyperparameters_file + " not found"); + } + // Check if file is a json + json input_hyperparameters = json::parse(file); + // Check if hyperparameters are valid + for (const auto& dataset : datasets) { + if (!input_hyperparameters.contains(dataset)) { + throw std::runtime_error("Dataset " + dataset + " not found in hyperparameters file"); + } + hyperparameters[dataset] = input_hyperparameters[dataset]; + } + } + json HyperParameters::get(const std::string& key) + { + return hyperparameters.at(key); + } +} /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/HyperParameters.h b/src/Platform/HyperParameters.h new file mode 100644 index 0000000..da22fff --- /dev/null +++ b/src/Platform/HyperParameters.h @@ -0,0 +1,22 @@ +#ifndef HYPERPARAMETERS_H +#define HYPERPARAMETERS_H +#include +#include +#include +#include + +namespace platform { + using json = nlohmann::json; + class HyperParameters { + public: + HyperParameters() = default; + explicit HyperParameters(const std::vector& datasets, const json& hyperparameters_); + explicit HyperParameters(const std::vector& datasets, const std::string& hyperparameters_file); + ~HyperParameters() = default; + bool notEmpty(const std::string& key) const { return hyperparameters.at(key) != json(); } + json get(const std::string& key); + private: + std::map hyperparameters; + }; +} /* namespace platform */ +#endif /* HYPERPARAMETERS_H */ \ No newline at end of file diff --git a/src/Platform/b_main.cc b/src/Platform/b_main.cc index 25f206a..bf2c703 100644 --- a/src/Platform/b_main.cc +++ b/src/Platform/b_main.cc @@ -16,7 +16,9 @@ argparse::ArgumentParser manageArguments() auto env = platform::DotEnv(); argparse::ArgumentParser program("main"); program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); - program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment"); + program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); + program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ + "Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format."); program.add_argument("-m", "--model") .help("Model to use " + platform::Models::instance()->tostring()) .action([](const std::string& value) { @@ -53,7 +55,7 @@ argparse::ArgumentParser manageArguments() int main(int argc, char** argv) { - std::string file_name, model_name, title; + std::string file_name, model_name, title, hyperparameters_file; json hyperparameters_json; bool discretize_dataset, stratified, saveResults, quiet; std::vector seeds; @@ -71,6 +73,10 @@ int main(int argc, char** argv) seeds = program.get>("seeds"); auto hyperparameters = program.get("hyperparameters"); hyperparameters_json = json::parse(hyperparameters); + hyperparameters_file = program.get("hyper-file"); + if (hyperparameters_file != "" && hyperparameters != "{}") { + throw runtime_error("hyperparameters and hyper_file are mutually exclusive"); + } title = program.get("title"); if (title == "" && file_name == "") { throw runtime_error("title is mandatory if dataset is not provided"); @@ -96,15 +102,22 @@ int main(int argc, char** argv) filesToTest = datasets.getNames(); saveResults = true; } + platform::HyperParameters test_hyperparams; + if (hyperparameters_file != "") { + test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file); + } else { + test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); + } + /* - * Begin Processing - */ + * Begin Processing + */ auto env = platform::DotEnv(); auto experiment = platform::Experiment(); experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3"); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); - experiment.setHyperparameters(hyperparameters_json); + experiment.setHyperparameters(test_hyperparams); for (auto seed : seeds) { experiment.addRandomSeed(seed); } diff --git a/src/PyClassifiers/ODTE.cc b/src/PyClassifiers/ODTE.cc index 1083699..f168f43 100644 --- a/src/PyClassifiers/ODTE.cc +++ b/src/PyClassifiers/ODTE.cc @@ -5,7 +5,7 @@ namespace pywrap { { return callMethodString("graph"); } - void ODTE::setHyperparameters(nlohmann::json& hyperparameters) + void ODTE::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid const std::vector validKeys = { "n_jobs", "n_estimators", "random_state" }; diff --git a/src/PyClassifiers/ODTE.h b/src/PyClassifiers/ODTE.h index a3bbd05..1c90951 100644 --- a/src/PyClassifiers/ODTE.h +++ b/src/PyClassifiers/ODTE.h @@ -9,7 +9,7 @@ namespace pywrap { ODTE() : PyClassifier("odte", "Odte") {}; ~ODTE() = default; std::string graph(); - void setHyperparameters(nlohmann::json& hyperparameters) override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; }; } /* namespace pywrap */ #endif /* ODTE_H */ \ No newline at end of file diff --git a/src/PyClassifiers/PyClassifier.cc b/src/PyClassifiers/PyClassifier.cc index 9e9f75a..c15b9b7 100644 --- a/src/PyClassifiers/PyClassifier.cc +++ b/src/PyClassifiers/PyClassifier.cc @@ -81,7 +81,7 @@ namespace pywrap { float result = pyWrap->score(id, Xp, yp); return result; } - void PyClassifier::setHyperparameters(nlohmann::json& hyperparameters) + void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid, default is no hyperparameters const std::vector validKeys = { }; diff --git a/src/PyClassifiers/PyClassifier.h b/src/PyClassifiers/PyClassifier.h index f55eb25..7fe460a 100644 --- a/src/PyClassifiers/PyClassifier.h +++ b/src/PyClassifiers/PyClassifier.h @@ -27,7 +27,6 @@ namespace pywrap { std::vector predict(std::vector>& X) override { return std::vector(); }; float score(std::vector>& X, std::vector& y) override { return 0.0; }; float score(torch::Tensor& X, torch::Tensor& y) override; - void setHyperparameters(nlohmann::json& hyperparameters) override; std::string version(); std::string callMethodString(const std::string& method); std::string getVersion() override { return this->version(); }; @@ -39,6 +38,7 @@ namespace pywrap { bayesnet::status_t getStatus() const override { return bayesnet::NORMAL; }; std::vector topological_order() override { return std::vector(); } void dump_cpt() const override {}; + void setHyperparameters(const nlohmann::json& hyperparameters) override; protected: void checkHyperparameters(const std::vector& validKeys, const nlohmann::json& hyperparameters); nlohmann::json hyperparameters; diff --git a/src/PyClassifiers/RandomForest.cc b/src/PyClassifiers/RandomForest.cc index 3ba2424..64e33ec 100644 --- a/src/PyClassifiers/RandomForest.cc +++ b/src/PyClassifiers/RandomForest.cc @@ -1,7 +1,7 @@ #include "RandomForest.h" namespace pywrap { - void RandomForest::setHyperparameters(nlohmann::json& hyperparameters) + void RandomForest::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid const std::vector validKeys = { "n_estimators", "n_jobs", "random_state" }; diff --git a/src/PyClassifiers/RandomForest.h b/src/PyClassifiers/RandomForest.h index a6b2162..001aef0 100644 --- a/src/PyClassifiers/RandomForest.h +++ b/src/PyClassifiers/RandomForest.h @@ -7,7 +7,7 @@ namespace pywrap { public: RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier", true) {}; ~RandomForest() = default; - void setHyperparameters(nlohmann::json& hyperparameters) override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; }; } /* namespace pywrap */ #endif /* RANDOMFOREST_H */ \ No newline at end of file diff --git a/src/PyClassifiers/STree.cc b/src/PyClassifiers/STree.cc index db864bf..9e43e5e 100644 --- a/src/PyClassifiers/STree.cc +++ b/src/PyClassifiers/STree.cc @@ -5,7 +5,7 @@ namespace pywrap { { return callMethodString("graph"); } - void STree::setHyperparameters(nlohmann::json& hyperparameters) + void STree::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid const std::vector validKeys = { "C", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy" }; diff --git a/src/PyClassifiers/STree.h b/src/PyClassifiers/STree.h index 4691fc0..a803e71 100644 --- a/src/PyClassifiers/STree.h +++ b/src/PyClassifiers/STree.h @@ -9,7 +9,7 @@ namespace pywrap { STree() : PyClassifier("stree", "Stree") {}; ~STree() = default; std::string graph(); - void setHyperparameters(nlohmann::json& hyperparameters) override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; }; } /* namespace pywrap */ #endif /* STREE_H */ \ No newline at end of file diff --git a/src/PyClassifiers/SVC.cc b/src/PyClassifiers/SVC.cc index 2ed9c3a..6f0f725 100644 --- a/src/PyClassifiers/SVC.cc +++ b/src/PyClassifiers/SVC.cc @@ -1,7 +1,7 @@ #include "SVC.h" namespace pywrap { - void SVC::setHyperparameters(nlohmann::json& hyperparameters) + void SVC::setHyperparameters(const nlohmann::json& hyperparameters) { // Check if hyperparameters are valid const std::vector validKeys = { "C", "gamma", "kernel", "random_state" }; diff --git a/src/PyClassifiers/SVC.h b/src/PyClassifiers/SVC.h index d62bbbc..fc5a9ec 100644 --- a/src/PyClassifiers/SVC.h +++ b/src/PyClassifiers/SVC.h @@ -7,7 +7,7 @@ namespace pywrap { public: SVC() : PyClassifier("sklearn.svm", "SVC", true) {}; ~SVC() = default; - void setHyperparameters(nlohmann::json& hyperparameters) override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; }; } /* namespace pywrap */