Add Experiment, Result and Timer classes

This commit is contained in:
Ricardo Montañana Gómez 2023-07-24 01:15:12 +02:00
parent 0c226371cc
commit c10ebca0e0
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
5 changed files with 106 additions and 56 deletions

16
.vscode/launch.json vendored
View File

@ -16,10 +16,18 @@
{
"type": "lldb",
"request": "launch",
"name": "aout",
"program": "${workspaceFolder}/a.out",
"args": [],
"cwd": "${workspaceFolder}"
"name": "experiment",
"program": "${workspaceFolder}/build/src/Platform/main",
"args": [
"-f",
"iris",
"-m",
"TAN",
"-p",
"../../../data/",
"--discretize"
],
"cwd": "${workspaceFolder}/build/src/Platform",
},
{
"name": "Build & debug active file",

View File

@ -12,16 +12,22 @@
#include "AODE.h"
#include "TAN.h"
#include "platformUtils.h"
#include "Result.h"
#include "Folding.h"
using namespace std;
pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
{
auto result = Result();
auto k = fold->getNumberOfFolds();
float accuracy = 0.0;
auto accuracy = torch::zeros({ k }, kFloat64);
auto train_time = torch::zeros({ k }, kFloat64);
auto test_time = torch::zeros({ k }, kFloat64);
Timer train_timer, test_timer;
for (int i = 0; i < k; i++) {
train_timer.start();
auto [train, test] = fold->getFold(i);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
@ -30,10 +36,15 @@ pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model,
auto X_test = X.index({ test_t });
auto y_test = y.index({ test_t });
model->fit(X_train, y_train, features, className, states);
train_time[i] = train_timer.getDuration();
test_timer.start();
auto acc = model->score(X_test, y_test);
accuracy += acc;
test_time[i] = test_timer.getDuration();
accuracy[i] = acc;
}
return { accuracy / k, 0 };
result.setScore(torch::mean(accuracy).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
return result;
}
int main(int argc, char** argv)
@ -96,25 +107,23 @@ int main(int argc, char** argv)
cerr << program;
exit(1);
}
/*
* Begin Processing
*/
auto [X, y, features, className] = loadDataset(file_name, discretize_dataset, class_last);
auto states = map<string, vector<int>>();
if (discretize_dataset) {
auto [Xd, maxes] = discretizeTorch(X, y, features);
states = get_states(Xd, y, features, className);
X = Xd;
}
auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset);
auto fold = StratifiedKFold(5, y, -1);
auto classifiers = map<string, bayesnet::BaseClassifier*>({
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
}
);
auto experiment = Experiment();
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp");
experiment.setStratified(true).setNFolds(5).addRandomSeed(271).setScoreName("accuracy");
bayesnet::BaseClassifier* model = classifiers[model_name];
auto results = cross_validation(&fold, model, X, y, features, className, states);
cout << "Accuracy: " << results.first << endl;
auto result = cross_validation(&fold, model, X, y, features, className, states);
result.setDataset(file_name);
experiment.addResult(result);
experiment.save(path);
return 0;
}

57
src/Platform/Result.h Normal file
View File

@ -0,0 +1,57 @@
#ifndef RESULT_H
#define RESULT_H
#include <string>
#include <chrono>
using namespace std;
class Timer {
private:
chrono::time_point<chrono::steady_clock> begin;
public:
Timer() = default;
~Timer() = default;
void start() { begin = chrono::high_resolution_clock::now(); }
float getDuration() { return chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - begin).count(); }
};
class Result {
private:
string dataset, hyperparameters;
int samples, features, classes;
float score, score_std, train_time, train_time_std, test_time, test_time_std;
public:
Result() = default;
Result& setDataset(string dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(string hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; }
Result& setScore(float score) { this->score = score; return *this; }
Result& setScoreStd(float score_std) { this->score_std = score_std; return *this; }
Result& setTrainTime(float train_time) { this->train_time = train_time; return *this; }
Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; }
Result& setTestTime(float test_time) { this->test_time = test_time; return *this; }
Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; }
};
class Experiment {
private:
string title, model, platform, score_name, model_version, language_version;
bool discretized, stratified;
vector<Result> results;
vector<int> random_seeds;
int nfolds;
public:
Experiment() = default;
Experiment& setTitle(string title) { this->title = title; return *this; }
Experiment& setModel(string model) { this->model = model; return *this; }
Experiment& setPlatform(string platform) { this->platform = platform; return *this; }
Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int random_seed) { random_seeds.push_back(random_seed); return *this; }
void save(string path) { cout << "Saving experiment..." << endl; }
};
#endif

View File

@ -15,22 +15,6 @@ pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t
}
return { Xd, maxes };
}
pair<Tensor, map<string, int>> discretizeTorch(Tensor& X, Tensor& y, vector<string> features)
{
map<string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
auto Xd = torch::zeros_like(X, torch::kInt64);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
for (int i = 0; i < X.size(1); i++) {
auto xv = vector<float>(X.select(1, i).data_ptr<float>(), X.select(1, i).data_ptr<float>() + X.size(0));
fimdlp.fit(xv, yv);
auto xdv = fimdlp.transform(xv);
auto xd = torch::tensor(xdv, torch::kInt64);
maxes[features[i]] = xd.max().item<int>() + 1;
Xd.index_put_({ "...", i }, xd);
}
return { Xd, maxes };
}
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
@ -54,10 +38,10 @@ bool file_exists(const std::string& name)
}
}
tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last)
tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(string path, string name, bool class_last, bool discretize_dataset)
{
auto handler = ArffFiles();
handler.load(PATH + static_cast<string>(name) + ".arff", class_last);
handler.load(path + static_cast<string>(name) + ".arff", class_last);
// Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
@ -68,32 +52,24 @@ tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool di
features.push_back(feature.first);
}
Tensor Xd;
if (discretize) {
auto states = map<string, vector<int>>();
if (discretize_dataset) {
auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int64_t>(Xr[0].size()), static_cast<int64_t>(Xr.size()) }, torch::kInt64);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64));
}
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
} else {
Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat64);
Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
Xd.index_put_({ "...", i }, torch::tensor(X[i], torch::kFloat64));
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
}
}
return { Xd, torch::tensor(y, torch::kInt64), features, className };
}
map<string, vector<int>> get_states(Tensor& X, Tensor& y, vector<string> features, string className)
{
int max;
map<string, vector<int>> states;
for (int i = 0; i < X.size(1); i++) {
max = X.select(1, i).max().item<int>() + 1;
states[features[i]] = vector<int>(max);
}
max = y.max().item<int>() + 1;
states[className] = vector<int>(max);
return states;
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
}
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name)

View File

@ -12,8 +12,8 @@ const string PATH = "../../data/";
bool file_exists(const std::string& name);
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
pair<torch::Tensor, map<string, int>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string> features);
pair<torch::Tensor, map<string, vector<int>>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className);
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name);
tuple<torch::Tensor, torch::Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last);
map<string, vector<int>> get_states(torch::Tensor& X, torch::Tensor& y, vector<string> features, string className);
tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(string path, string name, bool class_last, bool discretize_dataset);
map<string, vector<int>> get_states(vector<string>& features, string className, map<string, int>& maxes);
#endif //PLATFORM_UTILS_H