Begin b_list excel

This commit is contained in:
2024-02-29 12:53:11 +01:00
parent 9a26baec47
commit c69dc08134
58 changed files with 148 additions and 39 deletions

175
src/main/Experiment.cc Normal file
View File

@@ -0,0 +1,175 @@
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "ReportConsole.h"
#include "Paths.h"
namespace platform {
using json = nlohmann::json;
void Experiment::saveResult()
{
result.save();
}
void Experiment::report()
{
ReportConsole report(result.getJson());
report.show();
}
void Experiment::show()
{
std::cout << result.getJson().dump(4) << std::endl;
}
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score)
{
for (auto fileName : filesToProcess) {
if (fileName.size() > max_name)
max_name = fileName.size();
}
std::cout << Colors::MAGENTA() << "*** Starting experiment: " << result.getTitle() << " ***" << Colors::RESET() << std::endl << std::endl;
if (!quiet) {
std::cout << Colors::GREEN() << " Status Meaning" << std::endl;
std::cout << " ------ --------------------------------" << Colors::RESET() << std::endl;
std::cout << " ( " << Colors::GREEN() << "a" << Colors::RESET() << " ) Fitting model with train dataset" << std::endl;
std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl;
std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl;
std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl;
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl;
}
int num = 0;
for (auto fileName : filesToProcess) {
if (!quiet)
std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush;
cross_validation(fileName, quiet, no_train_score);
if (!quiet)
std::cout << std::endl;
}
if (!quiet)
std::cout << std::endl;
}
std::string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
return Colors::GREEN();
case bayesnet::WARNING:
return Colors::YELLOW();
case bayesnet::ERROR:
return Colors::RED();
default:
return Colors::RESET();
}
}
void showProgress(int fold, const std::string& color, const std::string& phase)
{
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score)
{
auto datasets = Datasets(discretized, Paths::datasets());
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
auto features = datasets.getFeatures(fileName);
auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName);
if (!quiet) {
std::cout << " " << setw(5) << samples << " " << setw(5) << features.size() << flush;
}
// Prepare Result
auto partial_result = PartialResult();
auto [values, counts] = at::_unique(y);
partial_result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
partial_result.setHyperparameters(hyperparameters.get(fileName));
// Initialize results std::vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
auto edges = torch::zeros({ nResults }, torch::kFloat64);
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
std::vector<std::string> notes;
Timer train_timer, test_timer;
int item = 0;
bool first_seed = true;
for (auto seed : randomSeeds) {
if (!quiet) {
string prefix = " ";
if (!first_seed) {
prefix = "\n" + string(18 + max_name, ' ');
}
std::cout << prefix << setw(4) << right << seed << " " << flush;
first_seed = false;
}
folding::Fold* fold;
if (stratified)
fold = new folding::StratifiedKFold(nfolds, y, seed);
else
fold = new folding::KFold(nfolds, y.size(0), seed);
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(result.getModel());
setModelVersion(clf->getVersion());
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, fileName);
clf->setHyperparameters(hyperparameters.get(fileName));
// Split train - test dataset
train_timer.start();
auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model
clf->fit(X_train, y_train, features, className, states);
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
auto clf_notes = clf->getNotes();
std::transform(clf_notes.begin(), clf_notes.end(), std::back_inserter(notes), [nfold](const std::string& note)
{ return "Fold " + std::to_string(nfold) + ": " + note; });
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
double accuracy_train_value = 0.0;
// Score train
if (!no_train_score)
accuracy_train_value = clf->score(X_train, y_train);
// Test model
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
if (!quiet)
std::cout << "\b\b\b, " << flush;
// Store results and times in std::vector
partial_result.addScoreTrain(accuracy_train_value);
partial_result.addScoreTest(accuracy_test_value);
partial_result.addTimeTrain(train_time[item].item<double>());
partial_result.addTimeTest(test_time[item].item<double>());
item++;
}
if (!quiet)
std::cout << "end. " << flush;
delete fold;
}
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
partial_result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
partial_result.setDataset(fileName).setNotes(notes);
addResult(partial_result);
}
}

46
src/main/Experiment.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef EXPERIMENT_H
#define EXPERIMENT_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include "folding.hpp"
#include "BaseClassifier.h"
#include "HyperParameters.h"
#include "Result.h"
namespace platform {
using json = nlohmann::json;
class Experiment {
public:
Experiment() = default;
Experiment& setPlatform(const std::string& platform) { this->result.setPlatform(platform); return *this; }
Experiment& setScoreName(const std::string& score_name) { this->result.setScoreName(score_name); return *this; }
Experiment& setTitle(const std::string& title) { this->result.setTitle(title); return *this; }
Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; }
Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; }
Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; }
Experiment& setLanguageVersion(const std::string& language_version) { this->result.setLanguageVersion(language_version); return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; result.setDiscretized(discretized); return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; result.setStratified(stratified); return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; result.setNFolds(nfolds); return *this; }
Experiment& addResult(PartialResult result_) { result.addPartial(result_); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; }
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score);
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score);
void saveResult();
void show();
void report();
private:
Result result;
bool discretized{ false }, stratified{ false };
std::vector<PartialResult> results;
std::vector<int> randomSeeds;
HyperParameters hyperparameters;
int nfolds{ 0 };
int max_name{ 7 }; // max length of dataset name for formatting (default 7)
};
}
#endif

View File

@@ -0,0 +1,56 @@
#include "HyperParameters.h"
#include <fstream>
#include <sstream>
#include <iostream>
namespace platform {
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_)
{
// Initialize all datasets with the given hyperparameters
for (const auto& item : datasets) {
hyperparameters[item] = hyperparameters_;
}
}
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
std::string join(std::vector<std::string> const& strings, std::string delim)
{
std::stringstream ss;
std::copy(strings.begin(), strings.end(),
std::ostream_iterator<std::string>(ss, delim.c_str()));
return ss.str();
}
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
{
// Check if file exists
std::ifstream file(hyperparameters_file);
if (!file.is_open()) {
throw std::runtime_error("File " + hyperparameters_file + " not found");
}
// Check if file is a json
json file_hyperparameters = json::parse(file);
auto input_hyperparameters = file_hyperparameters["results"];
// Check if hyperparameters are valid
for (const auto& dataset : datasets) {
if (!input_hyperparameters.contains(dataset)) {
std::cerr << "*Warning: Dataset " << dataset << " not found in hyperparameters file" << " assuming default hyperparameters" << std::endl;
hyperparameters[dataset] = json({});
continue;
}
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
}
}
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
{
json result = hyperparameters.at(fileName);
for (const auto& item : result.items()) {
if (find(valid.begin(), valid.end(), item.key()) == valid.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid. Passed Hyperparameters are: "
+ result.dump(4) + "\n Valid hyperparameters are: {" + join(valid, ",") + "}");
}
}
}
json HyperParameters::get(const std::string& fileName)
{
return hyperparameters.at(fileName);
}
} /* namespace platform */

View File

@@ -0,0 +1,23 @@
#ifndef HYPERPARAMETERS_H
#define HYPERPARAMETERS_H
#include <string>
#include <map>
#include <vector>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class HyperParameters {
public:
HyperParameters() = default;
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
~HyperParameters() = default;
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
void check(const std::vector<std::string>& valid, const std::string& fileName);
json get(const std::string& fileName);
private:
std::map<std::string, json> hyperparameters;
};
} /* namespace platform */
#endif /* HYPERPARAMETERS_H */

52
src/main/Models.cc Normal file
View File

@@ -0,0 +1,52 @@
#include "Models.h"
namespace platform {
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Models* Models::factory = nullptr;
Models* Models::instance()
{
//manages singleton
if (factory == nullptr)
factory = new Models();
return factory;
}
void Models::registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
functionRegistry[name] = classFactoryFunction;
}
shared_ptr<bayesnet::BaseClassifier> Models::create(const std::string& name)
{
bayesnet::BaseClassifier* instance = nullptr;
// find name in the registry and call factory method.
auto it = functionRegistry.find(name);
if (it != functionRegistry.end())
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return unique_ptr<bayesnet::BaseClassifier>(instance);
else
throw std::runtime_error("Model not found: " + name);
}
std::vector<std::string> Models::getNames()
{
std::vector<std::string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<std::string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
return names;
}
std::string Models::tostring()
{
std::string result = "";
for (const auto& pair : functionRegistry) {
result += pair.first + ", ";
}
return "{" + result.substr(0, result.size() - 2) + "}";
}
Registrar::Registrar(const std::string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
Models::instance()->registerFactoryFunction(name, classFactoryFunction);
}
}

42
src/main/Models.h Normal file
View File

@@ -0,0 +1,42 @@
#ifndef MODELS_H
#define MODELS_H
#include <map>
#include "BaseClassifier.h"
#include "AODE.h"
#include "TAN.h"
#include "KDB.h"
#include "SPODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
#include "BoostAODE.h"
#include "STree.h"
#include "ODTE.h"
#include "SVC.h"
#include "XGBoost.h"
#include "RandomForest.h"
namespace platform {
class Models {
private:
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
public:
Models(Models&) = delete;
void operator=(const Models&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Models* instance();
shared_ptr<bayesnet::BaseClassifier> create(const std::string& name);
void registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
std::vector<string> getNames();
std::string tostring();
};
class Registrar {
public:
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
};
}
#endif

73
src/main/PartialResult.h Normal file
View File

@@ -0,0 +1,73 @@
#pragma once
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class PartialResult {
public:
PartialResult()
{
data["scores_train"] = json::array();
data["scores_test"] = json::array();
data["times_train"] = json::array();
data["times_test"] = json::array();
data["notes"] = json::array();
data["train_time"] = 0.0;
data["train_time_std"] = 0.0;
data["test_time"] = 0.0;
data["test_time_std"] = 0.0;
};
PartialResult& setDataset(const std::string& dataset) { data["dataset"] = dataset; return *this; }
PartialResult& setNotes(const std::vector<std::string>& notes)
{
json notes_ = notes;
data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end());
return *this;
}
PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; }
PartialResult& setSamples(int samples) { data["samples"] = samples; return *this; }
PartialResult& setFeatures(int features) { data["features"] = features; return *this; }
PartialResult& setClasses(int classes) { data["classes"] = classes; return *this; }
PartialResult& setScoreTrain(double score) { data["score_train"] = score; return *this; }
PartialResult& setScoreTrainStd(double score_std) { data["score_train_std"] = score_std; return *this; }
PartialResult& setScoreTest(double score) { data["score"] = score; return *this; }
PartialResult& setScoreTestStd(double score_std) { data["score_std"] = score_std; return *this; }
PartialResult& setTrainTime(double train_time)
{
data["train_time"] = train_time;
data["time"] = data["test_time"].get<double>() + data["train_time"].get<double>();
return *this;
}
PartialResult& setTrainTimeStd(double train_time_std)
{
data["train_time_std"] = train_time_std;
data["time_std"] = data["test_time_std"].get<double>() + data["train_time_std"].get<double>();
return *this;
}
PartialResult& setTestTime(double test_time)
{
data["test_time"] = test_time;
data["time"] = data["test_time"].get<double>() + data["train_time"].get<double>();
return *this;
}
PartialResult& setTestTimeStd(double test_time_std)
{
data["test_time_std"] = test_time_std;
data["time_std"] = data["test_time_std"].get<double>() + data["train_time_std"].get<double>();
return *this;
}
PartialResult& setNodes(float nodes) { data["nodes"] = nodes; return *this; }
PartialResult& setLeaves(float leaves) { data["leaves"] = leaves; return *this; }
PartialResult& setDepth(float depth) { data["depth"] = depth; return *this; }
PartialResult& addScoreTrain(double score) { data["scores_train"].push_back(score); return *this; }
PartialResult& addScoreTest(double score) { data["scores_test"].push_back(score); return *this; }
PartialResult& addTimeTrain(double time) { data["times_train"].push_back(time); return *this; }
PartialResult& addTimeTest(double time) { data["times_test"].push_back(time); return *this; }
json getJson() const { return data; }
private:
json data;
};
}

98
src/main/Result.cc Normal file
View File

@@ -0,0 +1,98 @@
#include "Result.h"
#include <filesystem>
#include <fstream>
#include <sstream>
#include "BestScore.h"
#include "Colors.h"
#include "DotEnv.h"
#include "CLocale.h"
#include "Paths.h"
namespace platform {
std::string get_actual_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
std::string get_actual_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
Result::Result()
{
data["date"] = get_actual_date();
data["time"] = get_actual_time();
data["results"] = json::array();
data["seeds"] = json::array();
}
Result& Result::load(const std::string& path, const std::string& fileName)
{
std::ifstream resultData(path + "/" + fileName);
if (resultData.is_open()) {
data = json::parse(resultData);
} else {
throw std::invalid_argument("Unable to open result file. [" + path + "/" + fileName + "]");
}
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
auto scoreName = data["score_name"];
auto best = BestScore::getScore(scoreName);
if (best.first != "") {
score /= best.second;
}
complete = data["results"].size() > 1;
return *this;
}
json Result::getJson()
{
return data;
}
void Result::save()
{
std::ofstream file(Paths::results() + "/" + getFilename());
file << data;
file.close();
}
std::string Result::getFilename() const
{
std::ostringstream oss;
oss << "results_" << data.at("score_name").get<std::string>() << "_" << data.at("model").get<std::string>() << "_"
<< data.at("platform").get<std::string>() << "_" << data["date"].get<std::string>() << "_"
<< data["time"].get<std::string>() << "_" << (data["stratified"] ? "1" : "0") << ".json";
return oss.str();
}
std::string Result::to_string(int maxModel) const
{
auto tmp = ConfigLocale();
std::stringstream oss;
auto duration = data["duration"].get<double>();
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
oss << data["date"].get<std::string>() << " ";
oss << std::setw(maxModel) << std::left << data["model"].get<std::string>() << " ";
oss << std::setw(11) << std::left << data["score_name"].get<std::string>() << " ";
oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << std::setw(1) << " " << completeString << " ";
oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
oss << std::setw(50) << std::left << data["title"].get<std::string>() << " ";
return oss.str();
}
}

51
src/main/Result.h Normal file
View File

@@ -0,0 +1,51 @@
#ifndef RESULT_H
#define RESULT_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
#include "HyperParameters.h"
#include "PartialResult.h"
#include "Timer.h"
namespace platform {
using json = nlohmann::json;
class Result {
public:
Result();
Result& load(const std::string& path, const std::string& filename);
void save();
// Getters
json getJson();
std::string to_string(int maxModel) const;
std::string getFilename() const;
std::string getDate() const { return data["date"].get<std::string>(); };
double getScore() const { return score; };
std::string getTitle() const { return data["title"].get<std::string>(); };
double getDuration() const { return data["duration"]; };
std::string getModel() const { return data["model"].get<std::string>(); };
std::string getScoreName() const { return data["score_name"].get<std::string>(); };
bool isComplete() const { return complete; };
// Setters
void setTitle(const std::string& title) { data["title"] = title; };
void setLanguage(const std::string& language) { data["language"] = language; };
void setLanguageVersion(const std::string& language_version) { data["language_version"] = language_version; };
void setDuration(double duration) { data["duration"] = duration; };
void setModel(const std::string& model) { data["model"] = model; };
void setModelVersion(const std::string& model_version) { data["version"] = model_version; };
void setScoreName(const std::string& scoreName) { data["score_name"] = scoreName; };
void setDiscretized(bool discretized) { data["discretized"] = discretized; };
void addSeed(int seed) { data["seeds"].push_back(seed); };
void addPartial(PartialResult& partial_result) { data["results"].push_back(partial_result.getJson()); };
void setStratified(bool stratified) { data["stratified"] = stratified; };
void setNFolds(int nfolds) { data["folds"] = nfolds; };
void setPlatform(const std::string& platform_name) { data["platform"] = platform_name; };
private:
json data;
bool complete;
double score = 0.0;
};
};
#endif

137
src/main/b_main.cc Normal file
View File

@@ -0,0 +1,137 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "Experiment.h"
#include "Datasets.h"
#include "DotEnv.h"
#include "Models.h"
#include "modelRegister.h"
#include "Paths.h"
#include "config.h"
using json = nlohmann::json;
void manageArguments(argparse::ArgumentParser& program)
{
auto env = platform::DotEnv();
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
}
);
program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
}
int main(int argc, char** argv)
{
argparse::ArgumentParser program("b_main", { project_version.begin(), project_version.end() });
manageArguments(program);
std::string file_name, model_name, title, hyperparameters_file;
json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet, no_train_score;
std::vector<int> seeds;
std::vector<std::string> filesToTest;
int n_folds;
try {
program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset");
model_name = program.get<std::string>("model");
discretize_dataset = program.get<bool>("discretize");
stratified = program.get<bool>("stratified");
quiet = program.get<bool>("quiet");
n_folds = program.get<int>("folds");
seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = program.get<std::string>("hyper-file");
no_train_score = program.get<bool>("no-train-score");
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
title = program.get<std::string>("title");
if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided");
}
saveResults = program.get<bool>("save");
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
if (file_name != "") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << std::endl;
exit(1);
}
if (title == "") {
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
}
filesToTest.push_back(file_name);
} else {
filesToTest = datasets.getNames();
saveResults = true;
}
platform::HyperParameters test_hyperparams;
if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file);
} else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
}
/*
* Begin Processing
*/
auto env = platform::DotEnv();
auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
platform::Timer timer;
timer.start();
experiment.go(filesToTest, quiet, no_train_score);
experiment.setDuration(timer.getDuration());
if (saveResults) {
experiment.saveResult();
}
if (!quiet)
experiment.report();
std::cout << "Done!" << std::endl;
return 0;
}

31
src/main/modelRegister.h Normal file
View File

@@ -0,0 +1,31 @@
#ifndef MODEL_REGISTER_H
#define MODEL_REGISTER_H
static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
static platform::Registrar registrarTLD("TANLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
static platform::Registrar registrarS("SPODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
static platform::Registrar registrarSLD("SPODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
static platform::Registrar registrarK("KDB",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
static platform::Registrar registrarKLD("KDBLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
static platform::Registrar registrarSt("STree",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
static platform::Registrar registrarOdte("Odte",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();});
static platform::Registrar registrarSvc("SVC",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();});
static platform::Registrar registrarRaF("RandomForest",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
static platform::Registrar registrarXGB("XGBoost",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::XGBoost();});
#endif