Begin b_list excel
This commit is contained in:
175
src/main/Experiment.cc
Normal file
175
src/main/Experiment.cc
Normal file
@@ -0,0 +1,175 @@
|
||||
#include "Experiment.h"
|
||||
#include "Datasets.h"
|
||||
#include "Models.h"
|
||||
#include "ReportConsole.h"
|
||||
#include "Paths.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
|
||||
void Experiment::saveResult()
|
||||
{
|
||||
result.save();
|
||||
}
|
||||
void Experiment::report()
|
||||
{
|
||||
ReportConsole report(result.getJson());
|
||||
report.show();
|
||||
}
|
||||
void Experiment::show()
|
||||
{
|
||||
std::cout << result.getJson().dump(4) << std::endl;
|
||||
}
|
||||
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score)
|
||||
{
|
||||
for (auto fileName : filesToProcess) {
|
||||
if (fileName.size() > max_name)
|
||||
max_name = fileName.size();
|
||||
}
|
||||
std::cout << Colors::MAGENTA() << "*** Starting experiment: " << result.getTitle() << " ***" << Colors::RESET() << std::endl << std::endl;
|
||||
if (!quiet) {
|
||||
std::cout << Colors::GREEN() << " Status Meaning" << std::endl;
|
||||
std::cout << " ------ --------------------------------" << Colors::RESET() << std::endl;
|
||||
std::cout << " ( " << Colors::GREEN() << "a" << Colors::RESET() << " ) Fitting model with train dataset" << std::endl;
|
||||
std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl;
|
||||
std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl;
|
||||
std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
|
||||
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl;
|
||||
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl;
|
||||
}
|
||||
int num = 0;
|
||||
for (auto fileName : filesToProcess) {
|
||||
if (!quiet)
|
||||
std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush;
|
||||
cross_validation(fileName, quiet, no_train_score);
|
||||
if (!quiet)
|
||||
std::cout << std::endl;
|
||||
}
|
||||
if (!quiet)
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::string getColor(bayesnet::status_t status)
|
||||
{
|
||||
switch (status) {
|
||||
case bayesnet::NORMAL:
|
||||
return Colors::GREEN();
|
||||
case bayesnet::WARNING:
|
||||
return Colors::YELLOW();
|
||||
case bayesnet::ERROR:
|
||||
return Colors::RED();
|
||||
default:
|
||||
return Colors::RESET();
|
||||
}
|
||||
}
|
||||
|
||||
void showProgress(int fold, const std::string& color, const std::string& phase)
|
||||
{
|
||||
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
|
||||
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||
|
||||
}
|
||||
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score)
|
||||
{
|
||||
auto datasets = Datasets(discretized, Paths::datasets());
|
||||
// Get dataset
|
||||
auto [X, y] = datasets.getTensors(fileName);
|
||||
auto states = datasets.getStates(fileName);
|
||||
auto features = datasets.getFeatures(fileName);
|
||||
auto samples = datasets.getNSamples(fileName);
|
||||
auto className = datasets.getClassName(fileName);
|
||||
if (!quiet) {
|
||||
std::cout << " " << setw(5) << samples << " " << setw(5) << features.size() << flush;
|
||||
}
|
||||
// Prepare Result
|
||||
auto partial_result = PartialResult();
|
||||
auto [values, counts] = at::_unique(y);
|
||||
partial_result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
|
||||
partial_result.setHyperparameters(hyperparameters.get(fileName));
|
||||
// Initialize results std::vectors
|
||||
int nResults = nfolds * static_cast<int>(randomSeeds.size());
|
||||
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto edges = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
|
||||
std::vector<std::string> notes;
|
||||
Timer train_timer, test_timer;
|
||||
int item = 0;
|
||||
bool first_seed = true;
|
||||
for (auto seed : randomSeeds) {
|
||||
if (!quiet) {
|
||||
string prefix = " ";
|
||||
if (!first_seed) {
|
||||
prefix = "\n" + string(18 + max_name, ' ');
|
||||
}
|
||||
std::cout << prefix << setw(4) << right << seed << " " << flush;
|
||||
first_seed = false;
|
||||
}
|
||||
folding::Fold* fold;
|
||||
if (stratified)
|
||||
fold = new folding::StratifiedKFold(nfolds, y, seed);
|
||||
else
|
||||
fold = new folding::KFold(nfolds, y.size(0), seed);
|
||||
for (int nfold = 0; nfold < nfolds; nfold++) {
|
||||
auto clf = Models::instance()->create(result.getModel());
|
||||
setModelVersion(clf->getVersion());
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, fileName);
|
||||
clf->setHyperparameters(hyperparameters.get(fileName));
|
||||
// Split train - test dataset
|
||||
train_timer.start();
|
||||
auto [train, test] = fold->getFold(nfold);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ "...", train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
|
||||
// Train model
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
|
||||
auto clf_notes = clf->getNotes();
|
||||
std::transform(clf_notes.begin(), clf_notes.end(), std::back_inserter(notes), [nfold](const std::string& note)
|
||||
{ return "Fold " + std::to_string(nfold) + ": " + note; });
|
||||
nodes[item] = clf->getNumberOfNodes();
|
||||
edges[item] = clf->getNumberOfEdges();
|
||||
num_states[item] = clf->getNumberOfStates();
|
||||
train_time[item] = train_timer.getDuration();
|
||||
double accuracy_train_value = 0.0;
|
||||
// Score train
|
||||
if (!no_train_score)
|
||||
accuracy_train_value = clf->score(X_train, y_train);
|
||||
// Test model
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
|
||||
test_timer.start();
|
||||
auto accuracy_test_value = clf->score(X_test, y_test);
|
||||
test_time[item] = test_timer.getDuration();
|
||||
accuracy_train[item] = accuracy_train_value;
|
||||
accuracy_test[item] = accuracy_test_value;
|
||||
if (!quiet)
|
||||
std::cout << "\b\b\b, " << flush;
|
||||
// Store results and times in std::vector
|
||||
partial_result.addScoreTrain(accuracy_train_value);
|
||||
partial_result.addScoreTest(accuracy_test_value);
|
||||
partial_result.addTimeTrain(train_time[item].item<double>());
|
||||
partial_result.addTimeTest(test_time[item].item<double>());
|
||||
item++;
|
||||
}
|
||||
if (!quiet)
|
||||
std::cout << "end. " << flush;
|
||||
delete fold;
|
||||
}
|
||||
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
||||
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
||||
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||
partial_result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
|
||||
partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
|
||||
partial_result.setDataset(fileName).setNotes(notes);
|
||||
addResult(partial_result);
|
||||
}
|
||||
}
|
46
src/main/Experiment.h
Normal file
46
src/main/Experiment.h
Normal file
@@ -0,0 +1,46 @@
|
||||
#ifndef EXPERIMENT_H
|
||||
#define EXPERIMENT_H
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
#include "folding.hpp"
|
||||
#include "BaseClassifier.h"
|
||||
#include "HyperParameters.h"
|
||||
#include "Result.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
|
||||
class Experiment {
|
||||
public:
|
||||
Experiment() = default;
|
||||
Experiment& setPlatform(const std::string& platform) { this->result.setPlatform(platform); return *this; }
|
||||
Experiment& setScoreName(const std::string& score_name) { this->result.setScoreName(score_name); return *this; }
|
||||
Experiment& setTitle(const std::string& title) { this->result.setTitle(title); return *this; }
|
||||
Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; }
|
||||
Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; }
|
||||
Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; }
|
||||
Experiment& setLanguageVersion(const std::string& language_version) { this->result.setLanguageVersion(language_version); return *this; }
|
||||
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; result.setDiscretized(discretized); return *this; }
|
||||
Experiment& setStratified(bool stratified) { this->stratified = stratified; result.setStratified(stratified); return *this; }
|
||||
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; result.setNFolds(nfolds); return *this; }
|
||||
Experiment& addResult(PartialResult result_) { result.addPartial(result_); return *this; }
|
||||
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; }
|
||||
Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; }
|
||||
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
|
||||
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score);
|
||||
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score);
|
||||
void saveResult();
|
||||
void show();
|
||||
void report();
|
||||
private:
|
||||
Result result;
|
||||
bool discretized{ false }, stratified{ false };
|
||||
std::vector<PartialResult> results;
|
||||
std::vector<int> randomSeeds;
|
||||
HyperParameters hyperparameters;
|
||||
int nfolds{ 0 };
|
||||
int max_name{ 7 }; // max length of dataset name for formatting (default 7)
|
||||
};
|
||||
}
|
||||
#endif
|
56
src/main/HyperParameters.cc
Normal file
56
src/main/HyperParameters.cc
Normal file
@@ -0,0 +1,56 @@
|
||||
#include "HyperParameters.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
namespace platform {
|
||||
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_)
|
||||
{
|
||||
// Initialize all datasets with the given hyperparameters
|
||||
for (const auto& item : datasets) {
|
||||
hyperparameters[item] = hyperparameters_;
|
||||
}
|
||||
}
|
||||
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
|
||||
std::string join(std::vector<std::string> const& strings, std::string delim)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::copy(strings.begin(), strings.end(),
|
||||
std::ostream_iterator<std::string>(ss, delim.c_str()));
|
||||
return ss.str();
|
||||
}
|
||||
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
|
||||
{
|
||||
// Check if file exists
|
||||
std::ifstream file(hyperparameters_file);
|
||||
if (!file.is_open()) {
|
||||
throw std::runtime_error("File " + hyperparameters_file + " not found");
|
||||
}
|
||||
// Check if file is a json
|
||||
json file_hyperparameters = json::parse(file);
|
||||
auto input_hyperparameters = file_hyperparameters["results"];
|
||||
// Check if hyperparameters are valid
|
||||
for (const auto& dataset : datasets) {
|
||||
if (!input_hyperparameters.contains(dataset)) {
|
||||
std::cerr << "*Warning: Dataset " << dataset << " not found in hyperparameters file" << " assuming default hyperparameters" << std::endl;
|
||||
hyperparameters[dataset] = json({});
|
||||
continue;
|
||||
}
|
||||
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
|
||||
}
|
||||
}
|
||||
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
|
||||
{
|
||||
json result = hyperparameters.at(fileName);
|
||||
for (const auto& item : result.items()) {
|
||||
if (find(valid.begin(), valid.end(), item.key()) == valid.end()) {
|
||||
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid. Passed Hyperparameters are: "
|
||||
+ result.dump(4) + "\n Valid hyperparameters are: {" + join(valid, ",") + "}");
|
||||
}
|
||||
}
|
||||
}
|
||||
json HyperParameters::get(const std::string& fileName)
|
||||
{
|
||||
return hyperparameters.at(fileName);
|
||||
}
|
||||
} /* namespace platform */
|
23
src/main/HyperParameters.h
Normal file
23
src/main/HyperParameters.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef HYPERPARAMETERS_H
|
||||
#define HYPERPARAMETERS_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
class HyperParameters {
|
||||
public:
|
||||
HyperParameters() = default;
|
||||
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
|
||||
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
|
||||
~HyperParameters() = default;
|
||||
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
|
||||
void check(const std::vector<std::string>& valid, const std::string& fileName);
|
||||
json get(const std::string& fileName);
|
||||
private:
|
||||
std::map<std::string, json> hyperparameters;
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif /* HYPERPARAMETERS_H */
|
52
src/main/Models.cc
Normal file
52
src/main/Models.cc
Normal file
@@ -0,0 +1,52 @@
|
||||
#include "Models.h"
|
||||
namespace platform {
|
||||
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
|
||||
Models* Models::factory = nullptr;
|
||||
Models* Models::instance()
|
||||
{
|
||||
//manages singleton
|
||||
if (factory == nullptr)
|
||||
factory = new Models();
|
||||
return factory;
|
||||
}
|
||||
void Models::registerFactoryFunction(const std::string& name,
|
||||
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
|
||||
{
|
||||
// register the class factory function
|
||||
functionRegistry[name] = classFactoryFunction;
|
||||
}
|
||||
shared_ptr<bayesnet::BaseClassifier> Models::create(const std::string& name)
|
||||
{
|
||||
bayesnet::BaseClassifier* instance = nullptr;
|
||||
|
||||
// find name in the registry and call factory method.
|
||||
auto it = functionRegistry.find(name);
|
||||
if (it != functionRegistry.end())
|
||||
instance = it->second();
|
||||
// wrap instance in a shared ptr and return
|
||||
if (instance != nullptr)
|
||||
return unique_ptr<bayesnet::BaseClassifier>(instance);
|
||||
else
|
||||
throw std::runtime_error("Model not found: " + name);
|
||||
}
|
||||
std::vector<std::string> Models::getNames()
|
||||
{
|
||||
std::vector<std::string> names;
|
||||
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
|
||||
[](const pair<std::string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
|
||||
return names;
|
||||
}
|
||||
std::string Models::tostring()
|
||||
{
|
||||
std::string result = "";
|
||||
for (const auto& pair : functionRegistry) {
|
||||
result += pair.first + ", ";
|
||||
}
|
||||
return "{" + result.substr(0, result.size() - 2) + "}";
|
||||
}
|
||||
Registrar::Registrar(const std::string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
|
||||
{
|
||||
// register the class factory function
|
||||
Models::instance()->registerFactoryFunction(name, classFactoryFunction);
|
||||
}
|
||||
}
|
42
src/main/Models.h
Normal file
42
src/main/Models.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef MODELS_H
|
||||
#define MODELS_H
|
||||
#include <map>
|
||||
#include "BaseClassifier.h"
|
||||
#include "AODE.h"
|
||||
#include "TAN.h"
|
||||
#include "KDB.h"
|
||||
#include "SPODE.h"
|
||||
#include "TANLd.h"
|
||||
#include "KDBLd.h"
|
||||
#include "SPODELd.h"
|
||||
#include "AODELd.h"
|
||||
#include "BoostAODE.h"
|
||||
#include "STree.h"
|
||||
#include "ODTE.h"
|
||||
#include "SVC.h"
|
||||
#include "XGBoost.h"
|
||||
#include "RandomForest.h"
|
||||
namespace platform {
|
||||
class Models {
|
||||
private:
|
||||
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
|
||||
static Models* factory; //singleton
|
||||
Models() {};
|
||||
public:
|
||||
Models(Models&) = delete;
|
||||
void operator=(const Models&) = delete;
|
||||
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
|
||||
static Models* instance();
|
||||
shared_ptr<bayesnet::BaseClassifier> create(const std::string& name);
|
||||
void registerFactoryFunction(const std::string& name,
|
||||
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
|
||||
std::vector<string> getNames();
|
||||
std::string tostring();
|
||||
|
||||
};
|
||||
class Registrar {
|
||||
public:
|
||||
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
|
||||
};
|
||||
}
|
||||
#endif
|
73
src/main/PartialResult.h
Normal file
73
src/main/PartialResult.h
Normal file
@@ -0,0 +1,73 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
class PartialResult {
|
||||
|
||||
public:
|
||||
PartialResult()
|
||||
{
|
||||
data["scores_train"] = json::array();
|
||||
data["scores_test"] = json::array();
|
||||
data["times_train"] = json::array();
|
||||
data["times_test"] = json::array();
|
||||
data["notes"] = json::array();
|
||||
data["train_time"] = 0.0;
|
||||
data["train_time_std"] = 0.0;
|
||||
data["test_time"] = 0.0;
|
||||
data["test_time_std"] = 0.0;
|
||||
};
|
||||
PartialResult& setDataset(const std::string& dataset) { data["dataset"] = dataset; return *this; }
|
||||
PartialResult& setNotes(const std::vector<std::string>& notes)
|
||||
{
|
||||
json notes_ = notes;
|
||||
data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end());
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; }
|
||||
PartialResult& setSamples(int samples) { data["samples"] = samples; return *this; }
|
||||
PartialResult& setFeatures(int features) { data["features"] = features; return *this; }
|
||||
PartialResult& setClasses(int classes) { data["classes"] = classes; return *this; }
|
||||
PartialResult& setScoreTrain(double score) { data["score_train"] = score; return *this; }
|
||||
PartialResult& setScoreTrainStd(double score_std) { data["score_train_std"] = score_std; return *this; }
|
||||
PartialResult& setScoreTest(double score) { data["score"] = score; return *this; }
|
||||
PartialResult& setScoreTestStd(double score_std) { data["score_std"] = score_std; return *this; }
|
||||
PartialResult& setTrainTime(double train_time)
|
||||
{
|
||||
data["train_time"] = train_time;
|
||||
data["time"] = data["test_time"].get<double>() + data["train_time"].get<double>();
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setTrainTimeStd(double train_time_std)
|
||||
{
|
||||
data["train_time_std"] = train_time_std;
|
||||
data["time_std"] = data["test_time_std"].get<double>() + data["train_time_std"].get<double>();
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setTestTime(double test_time)
|
||||
{
|
||||
data["test_time"] = test_time;
|
||||
data["time"] = data["test_time"].get<double>() + data["train_time"].get<double>();
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setTestTimeStd(double test_time_std)
|
||||
{
|
||||
data["test_time_std"] = test_time_std;
|
||||
data["time_std"] = data["test_time_std"].get<double>() + data["train_time_std"].get<double>();
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setNodes(float nodes) { data["nodes"] = nodes; return *this; }
|
||||
PartialResult& setLeaves(float leaves) { data["leaves"] = leaves; return *this; }
|
||||
PartialResult& setDepth(float depth) { data["depth"] = depth; return *this; }
|
||||
PartialResult& addScoreTrain(double score) { data["scores_train"].push_back(score); return *this; }
|
||||
PartialResult& addScoreTest(double score) { data["scores_test"].push_back(score); return *this; }
|
||||
PartialResult& addTimeTrain(double time) { data["times_train"].push_back(time); return *this; }
|
||||
PartialResult& addTimeTest(double time) { data["times_test"].push_back(time); return *this; }
|
||||
json getJson() const { return data; }
|
||||
private:
|
||||
json data;
|
||||
};
|
||||
}
|
98
src/main/Result.cc
Normal file
98
src/main/Result.cc
Normal file
@@ -0,0 +1,98 @@
|
||||
#include "Result.h"
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include "BestScore.h"
|
||||
#include "Colors.h"
|
||||
#include "DotEnv.h"
|
||||
#include "CLocale.h"
|
||||
#include "Paths.h"
|
||||
|
||||
namespace platform {
|
||||
std::string get_actual_date()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%Y-%m-%d");
|
||||
return oss.str();
|
||||
}
|
||||
std::string get_actual_time()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%H:%M:%S");
|
||||
return oss.str();
|
||||
}
|
||||
Result::Result()
|
||||
{
|
||||
data["date"] = get_actual_date();
|
||||
data["time"] = get_actual_time();
|
||||
data["results"] = json::array();
|
||||
data["seeds"] = json::array();
|
||||
}
|
||||
|
||||
Result& Result::load(const std::string& path, const std::string& fileName)
|
||||
{
|
||||
std::ifstream resultData(path + "/" + fileName);
|
||||
if (resultData.is_open()) {
|
||||
data = json::parse(resultData);
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open result file. [" + path + "/" + fileName + "]");
|
||||
}
|
||||
score = 0;
|
||||
for (const auto& result : data["results"]) {
|
||||
score += result["score"].get<double>();
|
||||
}
|
||||
auto scoreName = data["score_name"];
|
||||
auto best = BestScore::getScore(scoreName);
|
||||
if (best.first != "") {
|
||||
score /= best.second;
|
||||
}
|
||||
complete = data["results"].size() > 1;
|
||||
return *this;
|
||||
}
|
||||
json Result::getJson()
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
void Result::save()
|
||||
{
|
||||
std::ofstream file(Paths::results() + "/" + getFilename());
|
||||
file << data;
|
||||
file.close();
|
||||
}
|
||||
std::string Result::getFilename() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "results_" << data.at("score_name").get<std::string>() << "_" << data.at("model").get<std::string>() << "_"
|
||||
<< data.at("platform").get<std::string>() << "_" << data["date"].get<std::string>() << "_"
|
||||
<< data["time"].get<std::string>() << "_" << (data["stratified"] ? "1" : "0") << ".json";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
std::string Result::to_string(int maxModel) const
|
||||
{
|
||||
auto tmp = ConfigLocale();
|
||||
std::stringstream oss;
|
||||
auto duration = data["duration"].get<double>();
|
||||
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
|
||||
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
|
||||
oss << data["date"].get<std::string>() << " ";
|
||||
oss << std::setw(maxModel) << std::left << data["model"].get<std::string>() << " ";
|
||||
oss << std::setw(11) << std::left << data["score_name"].get<std::string>() << " ";
|
||||
oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " ";
|
||||
auto completeString = isComplete() ? "C" : "P";
|
||||
oss << std::setw(1) << " " << completeString << " ";
|
||||
oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
|
||||
oss << std::setw(50) << std::left << data["title"].get<std::string>() << " ";
|
||||
return oss.str();
|
||||
}
|
||||
}
|
51
src/main/Result.h
Normal file
51
src/main/Result.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef RESULT_H
|
||||
#define RESULT_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "HyperParameters.h"
|
||||
#include "PartialResult.h"
|
||||
#include "Timer.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
|
||||
class Result {
|
||||
public:
|
||||
Result();
|
||||
Result& load(const std::string& path, const std::string& filename);
|
||||
void save();
|
||||
// Getters
|
||||
json getJson();
|
||||
std::string to_string(int maxModel) const;
|
||||
std::string getFilename() const;
|
||||
std::string getDate() const { return data["date"].get<std::string>(); };
|
||||
double getScore() const { return score; };
|
||||
std::string getTitle() const { return data["title"].get<std::string>(); };
|
||||
double getDuration() const { return data["duration"]; };
|
||||
std::string getModel() const { return data["model"].get<std::string>(); };
|
||||
std::string getScoreName() const { return data["score_name"].get<std::string>(); };
|
||||
bool isComplete() const { return complete; };
|
||||
// Setters
|
||||
void setTitle(const std::string& title) { data["title"] = title; };
|
||||
void setLanguage(const std::string& language) { data["language"] = language; };
|
||||
void setLanguageVersion(const std::string& language_version) { data["language_version"] = language_version; };
|
||||
void setDuration(double duration) { data["duration"] = duration; };
|
||||
void setModel(const std::string& model) { data["model"] = model; };
|
||||
void setModelVersion(const std::string& model_version) { data["version"] = model_version; };
|
||||
void setScoreName(const std::string& scoreName) { data["score_name"] = scoreName; };
|
||||
void setDiscretized(bool discretized) { data["discretized"] = discretized; };
|
||||
void addSeed(int seed) { data["seeds"].push_back(seed); };
|
||||
void addPartial(PartialResult& partial_result) { data["results"].push_back(partial_result.getJson()); };
|
||||
void setStratified(bool stratified) { data["stratified"] = stratified; };
|
||||
void setNFolds(int nfolds) { data["folds"] = nfolds; };
|
||||
void setPlatform(const std::string& platform_name) { data["platform"] = platform_name; };
|
||||
|
||||
private:
|
||||
json data;
|
||||
bool complete;
|
||||
double score = 0.0;
|
||||
};
|
||||
};
|
||||
#endif
|
137
src/main/b_main.cc
Normal file
137
src/main/b_main.cc
Normal file
@@ -0,0 +1,137 @@
|
||||
#include <iostream>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "Experiment.h"
|
||||
#include "Datasets.h"
|
||||
#include "DotEnv.h"
|
||||
#include "Models.h"
|
||||
#include "modelRegister.h"
|
||||
#include "Paths.h"
|
||||
#include "config.h"
|
||||
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program)
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
|
||||
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
|
||||
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
|
||||
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
|
||||
program.add_argument("-m", "--model")
|
||||
.help("Model to use " + platform::Models::instance()->tostring())
|
||||
.action([](const std::string& value) {
|
||||
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
|
||||
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||
return value;
|
||||
}
|
||||
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
|
||||
}
|
||||
);
|
||||
program.add_argument("--title").default_value("").help("Experiment title");
|
||||
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
|
||||
program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
|
||||
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
|
||||
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
|
||||
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
|
||||
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
|
||||
try {
|
||||
auto k = stoi(value);
|
||||
if (k < 2) {
|
||||
throw std::runtime_error("Number of folds must be greater than 1");
|
||||
}
|
||||
return k;
|
||||
}
|
||||
catch (const runtime_error& err) {
|
||||
throw std::runtime_error(err.what());
|
||||
}
|
||||
catch (...) {
|
||||
throw std::runtime_error("Number of folds must be an integer");
|
||||
}});
|
||||
auto seed_values = env.getSeeds();
|
||||
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
argparse::ArgumentParser program("b_main", { project_version.begin(), project_version.end() });
|
||||
manageArguments(program);
|
||||
std::string file_name, model_name, title, hyperparameters_file;
|
||||
json hyperparameters_json;
|
||||
bool discretize_dataset, stratified, saveResults, quiet, no_train_score;
|
||||
std::vector<int> seeds;
|
||||
std::vector<std::string> filesToTest;
|
||||
int n_folds;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
file_name = program.get<std::string>("dataset");
|
||||
model_name = program.get<std::string>("model");
|
||||
discretize_dataset = program.get<bool>("discretize");
|
||||
stratified = program.get<bool>("stratified");
|
||||
quiet = program.get<bool>("quiet");
|
||||
n_folds = program.get<int>("folds");
|
||||
seeds = program.get<std::vector<int>>("seeds");
|
||||
auto hyperparameters = program.get<std::string>("hyperparameters");
|
||||
hyperparameters_json = json::parse(hyperparameters);
|
||||
hyperparameters_file = program.get<std::string>("hyper-file");
|
||||
no_train_score = program.get<bool>("no-train-score");
|
||||
if (hyperparameters_file != "" && hyperparameters != "{}") {
|
||||
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
|
||||
}
|
||||
title = program.get<std::string>("title");
|
||||
if (title == "" && file_name == "") {
|
||||
throw runtime_error("title is mandatory if dataset is not provided");
|
||||
}
|
||||
saveResults = program.get<bool>("save");
|
||||
}
|
||||
catch (const exception& err) {
|
||||
cerr << err.what() << std::endl;
|
||||
cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
|
||||
if (file_name != "") {
|
||||
if (!datasets.isDataset(file_name)) {
|
||||
cerr << "Dataset " << file_name << " not found" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
if (title == "") {
|
||||
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
|
||||
}
|
||||
filesToTest.push_back(file_name);
|
||||
} else {
|
||||
filesToTest = datasets.getNames();
|
||||
saveResults = true;
|
||||
}
|
||||
platform::HyperParameters test_hyperparams;
|
||||
if (hyperparameters_file != "") {
|
||||
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file);
|
||||
} else {
|
||||
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
|
||||
}
|
||||
|
||||
/*
|
||||
* Begin Processing
|
||||
*/
|
||||
auto env = platform::DotEnv();
|
||||
auto experiment = platform::Experiment();
|
||||
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
|
||||
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
|
||||
experiment.setHyperparameters(test_hyperparams);
|
||||
for (auto seed : seeds) {
|
||||
experiment.addRandomSeed(seed);
|
||||
}
|
||||
platform::Timer timer;
|
||||
timer.start();
|
||||
experiment.go(filesToTest, quiet, no_train_score);
|
||||
experiment.setDuration(timer.getDuration());
|
||||
if (saveResults) {
|
||||
experiment.saveResult();
|
||||
}
|
||||
if (!quiet)
|
||||
experiment.report();
|
||||
std::cout << "Done!" << std::endl;
|
||||
return 0;
|
||||
}
|
31
src/main/modelRegister.h
Normal file
31
src/main/modelRegister.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef MODEL_REGISTER_H
|
||||
#define MODEL_REGISTER_H
|
||||
static platform::Registrar registrarT("TAN",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
|
||||
static platform::Registrar registrarTLD("TANLd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
|
||||
static platform::Registrar registrarS("SPODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
|
||||
static platform::Registrar registrarSLD("SPODELd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
|
||||
static platform::Registrar registrarK("KDB",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
|
||||
static platform::Registrar registrarKLD("KDBLd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
|
||||
static platform::Registrar registrarA("AODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
|
||||
static platform::Registrar registrarALD("AODELd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
|
||||
static platform::Registrar registrarBA("BoostAODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
|
||||
static platform::Registrar registrarSt("STree",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
|
||||
static platform::Registrar registrarOdte("Odte",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();});
|
||||
static platform::Registrar registrarSvc("SVC",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();});
|
||||
static platform::Registrar registrarRaF("RandomForest",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
|
||||
static platform::Registrar registrarXGB("XGBoost",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::XGBoost();});
|
||||
#endif
|
Reference in New Issue
Block a user