Complete json output compatible with benchmark

This commit is contained in:
Ricardo Montañana Gómez 2023-07-26 19:01:39 +02:00
parent 6f7fb290b0
commit 3e954ba841
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
8 changed files with 88 additions and 16 deletions

View File

@ -11,6 +11,9 @@ namespace bayesnet {
vector<int> virtual predict(vector<vector<int>>& X) = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes() = 0;
int virtual getNumberOfEdges() = 0;
int virtual getNumberOfStates() = 0;
vector<string> virtual show() = 0;
vector<string> virtual graph(string title = "") = 0;
virtual ~BaseClassifier() = default;

View File

@ -141,4 +141,8 @@ namespace bayesnet {
{
return fitted ? model.getEdges().size() : 0;
}
int Classifier::getNumberOfStates()
{
return fitted ? model.getStates() : 0;
}
}

View File

@ -32,8 +32,9 @@ namespace bayesnet {
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
void addNodes();
int getNumberOfNodes();
int getNumberOfEdges();
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
Tensor predict(Tensor& X);
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;

View File

@ -130,4 +130,28 @@ namespace bayesnet {
}
return result;
}
int Ensemble::getNumberOfNodes()
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
nodes += models[i]->getNumberOfNodes();
}
return nodes;
}
int Ensemble::getNumberOfEdges()
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
edges += models[i]->getNumberOfEdges();
}
return edges;
}
int Ensemble::getNumberOfStates()
{
int states = 0;
for (auto i = 0; i < n_models; ++i) {
states += models[i]->getNumberOfStates();
}
return states;
}
}

View File

@ -36,6 +36,9 @@ namespace bayesnet {
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
vector<string> show() override;
vector<string> graph(string title) override;
};

View File

@ -2,36 +2,48 @@
namespace platform {
using json = nlohmann::json;
string get_date_time()
string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d_%H:%M:%S");
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
string Experiment::get_file_name()
{
string date_time = get_date_time();
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + date_time + "_" + (stratified ? "1" : "0") + ".json";
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
return result;
}
json Experiment::build_json()
{
json result;
result["title"] = title;
result["date"] = get_date();
result["time"] = get_time();
result["model"] = model;
result["version"] = model_version;
result["platform"] = platform;
result["score_name"] = score_name;
result["model_version"] = model_version;
result["language"] = language;
result["language_version"] = language_version;
result["discretized"] = discretized;
result["stratified"] = stratified;
result["nfolds"] = nfolds;
result["random_seeds"] = random_seeds;
result["folds"] = nfolds;
result["seeds"] = random_seeds;
result["duration"] = duration;
result["results"] = json::array();
for (auto& r : results) {
@ -43,12 +55,19 @@ namespace platform {
j["classes"] = r.getClasses();
j["score_train"] = r.getScoreTrain();
j["score_test"] = r.getScoreTest();
j["score"] = r.getScoreTest();
j["score_std"] = r.getScoreTestStd();
j["score_train_std"] = r.getScoreTrainStd();
j["score_test_std"] = r.getScoreTestStd();
j["train_time"] = r.getTrainTime();
j["train_time_std"] = r.getTrainTimeStd();
j["test_time"] = r.getTestTime();
j["test_time_std"] = r.getTestTimeStd();
j["time"] = r.getTestTime() + r.getTrainTime();
j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd();
j["nodes"] = r.getNodes();
j["leaves"] = r.getLeaves();
j["depth"] = r.getDepth();
result["results"].push_back(j);
}
return result;
@ -69,11 +88,16 @@ namespace platform {
);
auto Xt = torch::transpose(X, 0, 1);
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(0)).setFeatures(X.size(1)).setClasses(values.size(0));
auto k = fold->getNumberOfFolds();
auto accuracy_test = torch::zeros({ k }, torch::kFloat64);
auto accuracy_train = torch::zeros({ k }, torch::kFloat64);
auto train_time = torch::zeros({ k }, torch::kFloat64);
auto test_time = torch::zeros({ k }, torch::kFloat64);
auto nodes = torch::zeros({ k }, torch::kFloat64);
auto edges = torch::zeros({ k }, torch::kFloat64);
auto num_states = torch::zeros({ k }, torch::kFloat64);
Timer train_timer, test_timer;
for (int i = 0; i < k; i++) {
bayesnet::BaseClassifier* model = classifiers[model_name];
@ -86,6 +110,9 @@ namespace platform {
auto X_test = Xt.index({ "...", test_t });
auto y_test = y.index({ test_t });
model->fit(X_train, y_train, features, className, states);
nodes[i] = model->getNumberOfNodes();
edges[i] = model->getNumberOfEdges();
num_states[i] = model->getNumberOfStates();
cout << "Training Fold " << i + 1 << endl;
cout << "X_train: " << X_train.sizes() << endl;
cout << "y_train: " << y_train.sizes() << endl;
@ -102,6 +129,7 @@ namespace platform {
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
return result;
}
}

View File

@ -20,13 +20,14 @@ namespace platform {
Timer() = default;
~Timer() = default;
void start() { begin = chrono::high_resolution_clock::now(); }
float getDuration() { return chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - begin).count(); }
float getDuration() { return chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - begin).count() / 1000; }
};
class Result {
private:
string dataset, hyperparameters;
int samples, features, classes;
float score_train, score_test, score_train_std, score_test_std, train_time, train_time_std, test_time, test_time_std;
float nodes, leaves, depth;
public:
Result() = default;
Result& setDataset(string dataset) { this->dataset = dataset; return *this; }
@ -42,6 +43,9 @@ namespace platform {
Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; }
Result& setTestTime(float test_time) { this->test_time = test_time; return *this; }
Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; }
Result& setNodes(float nodes) { this->nodes = nodes; return *this; }
Result& setLeaves(float leaves) { this->leaves = leaves; return *this; }
Result& setDepth(float depth) { this->depth = depth; return *this; }
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; }
@ -57,10 +61,13 @@ namespace platform {
const float getTrainTimeStd() const { return train_time_std; }
const float getTestTime() const { return test_time; }
const float getTestTimeStd() const { return test_time_std; }
const float getNodes() const { return nodes; }
const float getLeaves() const { return leaves; }
const float getDepth() const { return depth; }
};
class Experiment {
private:
string title, model, platform, score_name, model_version, language_version;
string title, model, platform, score_name, model_version, language_version, language;
bool discretized, stratified;
vector<Result> results;
vector<int> random_seeds;
@ -74,6 +81,7 @@ namespace platform {
Experiment& setPlatform(string platform) { this->platform = platform; return *this; }
Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(string language) { this->language = language; return *this; }
Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }

View File

@ -18,8 +18,6 @@
using namespace std;
int main(int argc, char** argv)
{
map<string, bool> datasets = {
@ -60,6 +58,7 @@ int main(int argc, char** argv)
throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}");
}
);
program.add_argument("--title").required().help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
@ -79,7 +78,7 @@ int main(int argc, char** argv)
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, discretize_dataset, stratified;
int n_folds, seed;
string model_name, file_name, path, complete_file_name;
string model_name, file_name, path, complete_file_name, title;
try {
program.parse_args(argc, argv);
file_name = program.get<string>("dataset");
@ -91,6 +90,7 @@ int main(int argc, char** argv)
seed = program.get<int>("seed");
complete_file_name = path + file_name + ".arff";
class_last = datasets[file_name];
title = program.get<string>("title");
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
@ -110,7 +110,8 @@ int main(int argc, char** argv)
else
fold = new KFold(n_folds, y.numel(), seed);
auto experiment = platform::Experiment();
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp");
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setModelVersion("1...0").setPlatform("BayesNet");
experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy");
platform::Timer timer;
timer.start();