Add hyperparameters management in experiments
This commit is contained in:
@@ -25,6 +25,7 @@ namespace platform {
|
||||
oss << std::put_time(timeinfo, "%H:%M:%S");
|
||||
return oss.str();
|
||||
}
|
||||
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
|
||||
string Experiment::get_file_name()
|
||||
{
|
||||
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
|
||||
@@ -124,6 +125,8 @@ namespace platform {
|
||||
auto result = Result();
|
||||
auto [values, counts] = at::_unique(y);
|
||||
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
|
||||
result.setHyperparameters(hyperparameters);
|
||||
// Initialize results vectors
|
||||
int nResults = nfolds * static_cast<int>(randomSeeds.size());
|
||||
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
|
||||
@@ -144,6 +147,10 @@ namespace platform {
|
||||
for (int nfold = 0; nfold < nfolds; nfold++) {
|
||||
auto clf = Models::instance()->create(model);
|
||||
setModelVersion(clf->getVersion());
|
||||
if (hyperparameters.size() != 0) {
|
||||
clf->setHyperparameters(hyperparameters);
|
||||
}
|
||||
// Split train - test dataset
|
||||
train_timer.start();
|
||||
auto [train, test] = fold->getFold(nfold);
|
||||
auto train_t = torch::tensor(train);
|
||||
@@ -153,12 +160,14 @@ namespace platform {
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
cout << nfold + 1 << ", " << flush;
|
||||
// Train model
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
nodes[item] = clf->getNumberOfNodes();
|
||||
edges[item] = clf->getNumberOfEdges();
|
||||
num_states[item] = clf->getNumberOfStates();
|
||||
train_time[item] = train_timer.getDuration();
|
||||
auto accuracy_train_value = clf->score(X_train, y_train);
|
||||
// Test model
|
||||
test_timer.start();
|
||||
auto accuracy_test_value = clf->score(X_test, y_test);
|
||||
test_time[item] = test_timer.getDuration();
|
||||
|
@@ -29,7 +29,8 @@ namespace platform {
|
||||
};
|
||||
class Result {
|
||||
private:
|
||||
string dataset, hyperparameters, model_version;
|
||||
string dataset, model_version;
|
||||
json hyperparameters;
|
||||
int samples{ 0 }, features{ 0 }, classes{ 0 };
|
||||
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
|
||||
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
|
||||
@@ -37,7 +38,7 @@ namespace platform {
|
||||
public:
|
||||
Result() = default;
|
||||
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; }
|
||||
Result& setHyperparameters(const string& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
|
||||
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
|
||||
Result& setSamples(int samples) { this->samples = samples; return *this; }
|
||||
Result& setFeatures(int features) { this->features = features; return *this; }
|
||||
Result& setClasses(int classes) { this->classes = classes; return *this; }
|
||||
@@ -59,7 +60,7 @@ namespace platform {
|
||||
const float get_score_train() const { return score_train; }
|
||||
float get_score_test() { return score_test; }
|
||||
const string& getDataset() const { return dataset; }
|
||||
const string& getHyperparameters() const { return hyperparameters; }
|
||||
const json& getHyperparameters() const { return hyperparameters; }
|
||||
const int getSamples() const { return samples; }
|
||||
const int getFeatures() const { return features; }
|
||||
const int getClasses() const { return classes; }
|
||||
@@ -85,11 +86,12 @@ namespace platform {
|
||||
bool discretized{ false }, stratified{ false };
|
||||
vector<Result> results;
|
||||
vector<int> randomSeeds;
|
||||
json hyperparameters = "{}";
|
||||
int nfolds{ 0 };
|
||||
float duration{ 0 };
|
||||
json build_json();
|
||||
public:
|
||||
Experiment() = default;
|
||||
Experiment();
|
||||
Experiment& setTitle(const string& title) { this->title = title; return *this; }
|
||||
Experiment& setModel(const string& model) { this->model = model; return *this; }
|
||||
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; }
|
||||
@@ -103,6 +105,7 @@ namespace platform {
|
||||
Experiment& addResult(Result result) { results.push_back(result); return *this; }
|
||||
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
|
||||
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
|
||||
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
|
||||
string get_file_name();
|
||||
void save(const string& path);
|
||||
void cross_validation(const string& path, const string& fileName);
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#include <iostream>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "platformUtils.h"
|
||||
#include "Experiment.h"
|
||||
#include "Datasets.h"
|
||||
@@ -10,12 +11,14 @@
|
||||
|
||||
|
||||
using namespace std;
|
||||
using json = nlohmann::json;
|
||||
|
||||
argparse::ArgumentParser manageArguments(int argc, char** argv)
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
argparse::ArgumentParser program("main");
|
||||
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
|
||||
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
|
||||
program.add_argument("-p", "--path")
|
||||
.help("folder where the data files are located, default")
|
||||
.default_value(string{ platform::Paths::datasets() });
|
||||
@@ -59,6 +62,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
|
||||
auto seeds = program.get<vector<int>>("seeds");
|
||||
auto complete_file_name = path + file_name + ".arff";
|
||||
auto title = program.get<string>("title");
|
||||
auto hyperparameters = program.get<string>("hyperparameters");
|
||||
if (title == "" && file_name == "") {
|
||||
throw runtime_error("title is mandatory if dataset is not provided");
|
||||
}
|
||||
@@ -82,6 +86,7 @@ int main(int argc, char** argv)
|
||||
auto stratified = program.get<bool>("stratified");
|
||||
auto n_folds = program.get<int>("folds");
|
||||
auto seeds = program.get<vector<int>>("seeds");
|
||||
auto hyperparameters =program.get<string>("hyperparameters");
|
||||
vector<string> filesToTest;
|
||||
auto datasets = platform::Datasets(path, true, platform::ARFF);
|
||||
auto title = program.get<string>("title");
|
||||
@@ -106,6 +111,7 @@ int main(int argc, char** argv)
|
||||
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
|
||||
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
|
||||
experiment.setHyperparameters(json::parse(hyperparameters));
|
||||
for (auto seed : seeds) {
|
||||
experiment.addRandomSeed(seed);
|
||||
}
|
||||
|
Reference in New Issue
Block a user