Add hyperparameters management in experiments

This commit is contained in:
2023-08-20 17:57:38 +02:00
parent 7a6ec73d63
commit 4964aab722
17 changed files with 141 additions and 117 deletions

View File

@@ -25,6 +25,7 @@ namespace platform {
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
string Experiment::get_file_name()
{
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
@@ -124,6 +125,8 @@ namespace platform {
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters);
// Initialize results vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
@@ -144,6 +147,10 @@ namespace platform {
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) {
clf->setHyperparameters(hyperparameters);
}
// Split train - test dataset
train_timer.start();
auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train);
@@ -153,12 +160,14 @@ namespace platform {
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
cout << nfold + 1 << ", " << flush;
// Train model
clf->fit(X_train, y_train, features, className, states);
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
auto accuracy_train_value = clf->score(X_train, y_train);
// Test model
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();

View File

@@ -29,7 +29,8 @@ namespace platform {
};
class Result {
private:
string dataset, hyperparameters, model_version;
string dataset, model_version;
json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
@@ -37,7 +38,7 @@ namespace platform {
public:
Result() = default;
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const string& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; }
@@ -59,7 +60,7 @@ namespace platform {
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; }
const string& getHyperparameters() const { return hyperparameters; }
const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; }
const int getFeatures() const { return features; }
const int getClasses() const { return classes; }
@@ -85,11 +86,12 @@ namespace platform {
bool discretized{ false }, stratified{ false };
vector<Result> results;
vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public:
Experiment() = default;
Experiment();
Experiment& setTitle(const string& title) { this->title = title; return *this; }
Experiment& setModel(const string& model) { this->model = model; return *this; }
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; }
@@ -103,6 +105,7 @@ namespace platform {
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
string get_file_name();
void save(const string& path);
void cross_validation(const string& path, const string& fileName);

View File

@@ -1,5 +1,6 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "platformUtils.h"
#include "Experiment.h"
#include "Datasets.h"
@@ -10,12 +11,14 @@
using namespace std;
using json = nlohmann::json;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
auto env = platform::DotEnv();
argparse::ArgumentParser program("main");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
program.add_argument("-p", "--path")
.help("folder where the data files are located, default")
.default_value(string{ platform::Paths::datasets() });
@@ -59,6 +62,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
auto seeds = program.get<vector<int>>("seeds");
auto complete_file_name = path + file_name + ".arff";
auto title = program.get<string>("title");
auto hyperparameters = program.get<string>("hyperparameters");
if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided");
}
@@ -82,6 +86,7 @@ int main(int argc, char** argv)
auto stratified = program.get<bool>("stratified");
auto n_folds = program.get<int>("folds");
auto seeds = program.get<vector<int>>("seeds");
auto hyperparameters =program.get<string>("hyperparameters");
vector<string> filesToTest;
auto datasets = platform::Datasets(path, true, platform::ARFF);
auto title = program.get<string>("title");
@@ -106,6 +111,7 @@ int main(int argc, char** argv)
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(json::parse(hyperparameters));
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}