Implement hyperparameters with json file

This commit is contained in:
Ricardo Montañana Gómez 2023-11-18 11:56:10 +01:00
parent 28f3d87e32
commit 89c4613591
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
25 changed files with 112 additions and 44 deletions

View File

@ -29,7 +29,7 @@ namespace bayesnet {
virtual std::string getVersion() = 0;
std::vector<std::string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
};
}
#endif

View File

@ -43,7 +43,7 @@ namespace bayesnet {
y_train = y_;
}
}
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" };

View File

@ -8,9 +8,9 @@ namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE();
virtual ~BoostAODE() {};
virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;

View File

@ -153,7 +153,7 @@ namespace bayesnet {
{
model.dump_cpt();
}
void Classifier::checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters)
void Classifier::checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
@ -161,7 +161,7 @@ namespace bayesnet {
}
}
}
void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };

View File

@ -22,7 +22,7 @@ namespace bayesnet {
void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters);
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y);
public:
Classifier(Network model);
@ -44,7 +44,7 @@ namespace bayesnet {
std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
}
#endif

View File

@ -2,7 +2,7 @@
namespace bayesnet {
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::setHyperparameters(nlohmann::json& hyperparameters)
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "k", "theta" };

View File

@ -13,8 +13,8 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override;
public:
explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {};
void setHyperparameters(nlohmann::json& hyperparameters) override;
virtual ~KDB() = default;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
};
}

View File

@ -10,7 +10,7 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override;
public:
explicit SPODE(int root);
virtual ~SPODE() {};
virtual ~SPODE() = default;
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
};
}

View File

@ -8,7 +8,7 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override;
public:
TAN();
virtual ~TAN() {};
virtual ~TAN() = default;
std::vector<std::string> graph(const std::string& name = "TAN") const override;
};
}

View File

@ -8,7 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
include_directories(${Python3_INCLUDE_DIRS})
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)

View File

@ -26,7 +26,6 @@ namespace platform {
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
std::string Experiment::get_file_name()
{
std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
@ -148,7 +147,7 @@ namespace platform {
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters);
result.setHyperparameters(hyperparameters.get(fileName));
// Initialize results std::vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
@ -171,8 +170,8 @@ namespace platform {
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) {
clf->setHyperparameters(hyperparameters);
if (hyperparameters.notEmpty(fileName)) {
clf->setHyperparameters(hyperparameters.get(fileName));
}
// Split train - test dataset
train_timer.start();

View File

@ -6,6 +6,7 @@
#include <chrono>
#include "Folding.h"
#include "BaseClassifier.h"
#include "HyperParameters.h"
#include "TAN.h"
#include "KDB.h"
#include "AODE.h"
@ -80,17 +81,8 @@ namespace platform {
const std::vector<double>& getTimesTest() const { return times_test; }
};
class Experiment {
private:
std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
std::vector<Result> results;
std::vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public:
Experiment();
Experiment() = default;
Experiment& setTitle(const std::string& title) { this->title = title; return *this; }
Experiment& setModel(const std::string& model) { this->model = model; return *this; }
Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; }
@ -104,13 +96,22 @@ namespace platform {
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
std::string get_file_name();
void save(const std::string& path);
void cross_validation(const std::string& fileName, bool quiet);
void go(std::vector<std::string> filesToProcess, bool quiet);
void show();
void report();
private:
std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
std::vector<Result> results;
std::vector<int> randomSeeds;
HyperParameters hyperparameters;
int nfolds{ 0 };
float duration{ 0 };
json build_json();
};
}
#endif

View File

@ -0,0 +1,33 @@
#include "HyperParameters.h"
#include <fstream>
namespace platform {
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_)
{
// Initialize all datasets with the given hyperparameters
for (const auto& item : datasets) {
hyperparameters[item] = hyperparameters_;
}
}
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
{
// Check if file exists
std::ifstream file(hyperparameters_file);
if (!file.is_open()) {
throw std::runtime_error("File " + hyperparameters_file + " not found");
}
// Check if file is a json
json input_hyperparameters = json::parse(file);
// Check if hyperparameters are valid
for (const auto& dataset : datasets) {
if (!input_hyperparameters.contains(dataset)) {
throw std::runtime_error("Dataset " + dataset + " not found in hyperparameters file");
}
hyperparameters[dataset] = input_hyperparameters[dataset];
}
}
json HyperParameters::get(const std::string& key)
{
return hyperparameters.at(key);
}
} /* namespace platform */

View File

@ -0,0 +1,22 @@
#ifndef HYPERPARAMETERS_H
#define HYPERPARAMETERS_H
#include <string>
#include <map>
#include <vector>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class HyperParameters {
public:
HyperParameters() = default;
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
~HyperParameters() = default;
bool notEmpty(const std::string& key) const { return hyperparameters.at(key) != json(); }
json get(const std::string& key);
private:
std::map<std::string, json> hyperparameters;
};
} /* namespace platform */
#endif /* HYPERPARAMETERS_H */

View File

@ -16,7 +16,9 @@ argparse::ArgumentParser manageArguments()
auto env = platform::DotEnv();
argparse::ArgumentParser program("main");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
@ -53,7 +55,7 @@ argparse::ArgumentParser manageArguments()
int main(int argc, char** argv)
{
std::string file_name, model_name, title;
std::string file_name, model_name, title, hyperparameters_file;
json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet;
std::vector<int> seeds;
@ -71,6 +73,10 @@ int main(int argc, char** argv)
seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = program.get<std::string>("hyper-file");
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
title = program.get<std::string>("title");
if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided");
@ -96,15 +102,22 @@ int main(int argc, char** argv)
filesToTest = datasets.getNames();
saveResults = true;
}
platform::HyperParameters test_hyperparams;
if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file);
} else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
}
/*
* Begin Processing
*/
* Begin Processing
*/
auto env = platform::DotEnv();
auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(hyperparameters_json);
experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}

View File

@ -5,7 +5,7 @@ namespace pywrap {
{
return callMethodString("graph");
}
void ODTE::setHyperparameters(nlohmann::json& hyperparameters)
void ODTE::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "n_jobs", "n_estimators", "random_state" };

View File

@ -9,7 +9,7 @@ namespace pywrap {
ODTE() : PyClassifier("odte", "Odte") {};
~ODTE() = default;
std::string graph();
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* ODTE_H */

View File

@ -81,7 +81,7 @@ namespace pywrap {
float result = pyWrap->score(id, Xp, yp);
return result;
}
void PyClassifier::setHyperparameters(nlohmann::json& hyperparameters)
void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };

View File

@ -27,7 +27,6 @@ namespace pywrap {
std::vector<int> predict(std::vector<std::vector<int >>& X) override { return std::vector<int>(); };
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override { return 0.0; };
float score(torch::Tensor& X, torch::Tensor& y) override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
std::string version();
std::string callMethodString(const std::string& method);
std::string getVersion() override { return this->version(); };
@ -39,6 +38,7 @@ namespace pywrap {
bayesnet::status_t getStatus() const override { return bayesnet::NORMAL; };
std::vector<std::string> topological_order() override { return std::vector<std::string>(); }
void dump_cpt() const override {};
void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected:
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters);
nlohmann::json hyperparameters;

View File

@ -1,7 +1,7 @@
#include "RandomForest.h"
namespace pywrap {
void RandomForest::setHyperparameters(nlohmann::json& hyperparameters)
void RandomForest::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "n_estimators", "n_jobs", "random_state" };

View File

@ -7,7 +7,7 @@ namespace pywrap {
public:
RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier", true) {};
~RandomForest() = default;
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* RANDOMFOREST_H */

View File

@ -5,7 +5,7 @@ namespace pywrap {
{
return callMethodString("graph");
}
void STree::setHyperparameters(nlohmann::json& hyperparameters)
void STree::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy" };

View File

@ -9,7 +9,7 @@ namespace pywrap {
STree() : PyClassifier("stree", "Stree") {};
~STree() = default;
std::string graph();
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* STREE_H */

View File

@ -1,7 +1,7 @@
#include "SVC.h"
namespace pywrap {
void SVC::setHyperparameters(nlohmann::json& hyperparameters)
void SVC::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "gamma", "kernel", "random_state" };

View File

@ -7,7 +7,7 @@ namespace pywrap {
public:
SVC() : PyClassifier("sklearn.svm", "SVC", true) {};
~SVC() = default;
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */