Add Experiment, Result and Timer classes
This commit is contained in:
parent
0c226371cc
commit
c10ebca0e0
16
.vscode/launch.json
vendored
16
.vscode/launch.json
vendored
@ -16,10 +16,18 @@
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "aout",
|
||||
"program": "${workspaceFolder}/a.out",
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
"name": "experiment",
|
||||
"program": "${workspaceFolder}/build/src/Platform/main",
|
||||
"args": [
|
||||
"-f",
|
||||
"iris",
|
||||
"-m",
|
||||
"TAN",
|
||||
"-p",
|
||||
"../../../data/",
|
||||
"--discretize"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build/src/Platform",
|
||||
},
|
||||
{
|
||||
"name": "Build & debug active file",
|
||||
|
@ -12,16 +12,22 @@
|
||||
#include "AODE.h"
|
||||
#include "TAN.h"
|
||||
#include "platformUtils.h"
|
||||
#include "Result.h"
|
||||
#include "Folding.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||
Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||
{
|
||||
auto result = Result();
|
||||
auto k = fold->getNumberOfFolds();
|
||||
float accuracy = 0.0;
|
||||
auto accuracy = torch::zeros({ k }, kFloat64);
|
||||
auto train_time = torch::zeros({ k }, kFloat64);
|
||||
auto test_time = torch::zeros({ k }, kFloat64);
|
||||
Timer train_timer, test_timer;
|
||||
for (int i = 0; i < k; i++) {
|
||||
train_timer.start();
|
||||
auto [train, test] = fold->getFold(i);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
@ -30,10 +36,15 @@ pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model,
|
||||
auto X_test = X.index({ test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
model->fit(X_train, y_train, features, className, states);
|
||||
train_time[i] = train_timer.getDuration();
|
||||
test_timer.start();
|
||||
auto acc = model->score(X_test, y_test);
|
||||
accuracy += acc;
|
||||
test_time[i] = test_timer.getDuration();
|
||||
accuracy[i] = acc;
|
||||
}
|
||||
return { accuracy / k, 0 };
|
||||
result.setScore(torch::mean(accuracy).item<double>());
|
||||
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
@ -96,25 +107,23 @@ int main(int argc, char** argv)
|
||||
cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Begin Processing
|
||||
*/
|
||||
auto [X, y, features, className] = loadDataset(file_name, discretize_dataset, class_last);
|
||||
auto states = map<string, vector<int>>();
|
||||
if (discretize_dataset) {
|
||||
auto [Xd, maxes] = discretizeTorch(X, y, features);
|
||||
states = get_states(Xd, y, features, className);
|
||||
X = Xd;
|
||||
}
|
||||
auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset);
|
||||
auto fold = StratifiedKFold(5, y, -1);
|
||||
auto classifiers = map<string, bayesnet::BaseClassifier*>({
|
||||
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
|
||||
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
|
||||
}
|
||||
);
|
||||
auto experiment = Experiment();
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp");
|
||||
experiment.setStratified(true).setNFolds(5).addRandomSeed(271).setScoreName("accuracy");
|
||||
bayesnet::BaseClassifier* model = classifiers[model_name];
|
||||
auto results = cross_validation(&fold, model, X, y, features, className, states);
|
||||
cout << "Accuracy: " << results.first << endl;
|
||||
auto result = cross_validation(&fold, model, X, y, features, className, states);
|
||||
result.setDataset(file_name);
|
||||
experiment.addResult(result);
|
||||
experiment.save(path);
|
||||
return 0;
|
||||
}
|
||||
|
57
src/Platform/Result.h
Normal file
57
src/Platform/Result.h
Normal file
@ -0,0 +1,57 @@
|
||||
#ifndef RESULT_H
|
||||
#define RESULT_H
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
|
||||
using namespace std;
|
||||
class Timer {
|
||||
private:
|
||||
chrono::time_point<chrono::steady_clock> begin;
|
||||
public:
|
||||
Timer() = default;
|
||||
~Timer() = default;
|
||||
void start() { begin = chrono::high_resolution_clock::now(); }
|
||||
float getDuration() { return chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - begin).count(); }
|
||||
};
|
||||
class Result {
|
||||
private:
|
||||
string dataset, hyperparameters;
|
||||
int samples, features, classes;
|
||||
float score, score_std, train_time, train_time_std, test_time, test_time_std;
|
||||
public:
|
||||
Result() = default;
|
||||
Result& setDataset(string dataset) { this->dataset = dataset; return *this; }
|
||||
Result& setHyperparameters(string hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
|
||||
Result& setSamples(int samples) { this->samples = samples; return *this; }
|
||||
Result& setFeatures(int features) { this->features = features; return *this; }
|
||||
Result& setClasses(int classes) { this->classes = classes; return *this; }
|
||||
Result& setScore(float score) { this->score = score; return *this; }
|
||||
Result& setScoreStd(float score_std) { this->score_std = score_std; return *this; }
|
||||
Result& setTrainTime(float train_time) { this->train_time = train_time; return *this; }
|
||||
Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; }
|
||||
Result& setTestTime(float test_time) { this->test_time = test_time; return *this; }
|
||||
Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; }
|
||||
};
|
||||
class Experiment {
|
||||
private:
|
||||
string title, model, platform, score_name, model_version, language_version;
|
||||
bool discretized, stratified;
|
||||
vector<Result> results;
|
||||
vector<int> random_seeds;
|
||||
int nfolds;
|
||||
public:
|
||||
Experiment() = default;
|
||||
Experiment& setTitle(string title) { this->title = title; return *this; }
|
||||
Experiment& setModel(string model) { this->model = model; return *this; }
|
||||
Experiment& setPlatform(string platform) { this->platform = platform; return *this; }
|
||||
Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; }
|
||||
Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; }
|
||||
Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; }
|
||||
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
|
||||
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
|
||||
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
|
||||
Experiment& addResult(Result result) { results.push_back(result); return *this; }
|
||||
Experiment& addRandomSeed(int random_seed) { random_seeds.push_back(random_seed); return *this; }
|
||||
void save(string path) { cout << "Saving experiment..." << endl; }
|
||||
};
|
||||
#endif
|
@ -15,22 +15,6 @@ pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t
|
||||
}
|
||||
return { Xd, maxes };
|
||||
}
|
||||
pair<Tensor, map<string, int>> discretizeTorch(Tensor& X, Tensor& y, vector<string> features)
|
||||
{
|
||||
map<string, int> maxes;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
auto Xd = torch::zeros_like(X, torch::kInt64);
|
||||
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||
for (int i = 0; i < X.size(1); i++) {
|
||||
auto xv = vector<float>(X.select(1, i).data_ptr<float>(), X.select(1, i).data_ptr<float>() + X.size(0));
|
||||
fimdlp.fit(xv, yv);
|
||||
auto xdv = fimdlp.transform(xv);
|
||||
auto xd = torch::tensor(xdv, torch::kInt64);
|
||||
maxes[features[i]] = xd.max().item<int>() + 1;
|
||||
Xd.index_put_({ "...", i }, xd);
|
||||
}
|
||||
return { Xd, maxes };
|
||||
}
|
||||
|
||||
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
@ -54,10 +38,10 @@ bool file_exists(const std::string& name)
|
||||
}
|
||||
}
|
||||
|
||||
tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last)
|
||||
tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(string path, string name, bool class_last, bool discretize_dataset)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
handler.load(PATH + static_cast<string>(name) + ".arff", class_last);
|
||||
handler.load(path + static_cast<string>(name) + ".arff", class_last);
|
||||
// Get Dataset X, y
|
||||
vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t& y = handler.getY();
|
||||
@ -68,32 +52,24 @@ tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool di
|
||||
features.push_back(feature.first);
|
||||
}
|
||||
Tensor Xd;
|
||||
if (discretize) {
|
||||
auto states = map<string, vector<int>>();
|
||||
if (discretize_dataset) {
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
Xd = torch::zeros({ static_cast<int64_t>(Xr[0].size()), static_cast<int64_t>(Xr.size()) }, torch::kInt64);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
iota(begin(states[features[i]]), end(states[features[i]]), 0);
|
||||
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64));
|
||||
}
|
||||
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states[className]), end(states[className]), 0);
|
||||
} else {
|
||||
Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat64);
|
||||
Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
Xd.index_put_({ "...", i }, torch::tensor(X[i], torch::kFloat64));
|
||||
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
|
||||
}
|
||||
}
|
||||
return { Xd, torch::tensor(y, torch::kInt64), features, className };
|
||||
}
|
||||
|
||||
map<string, vector<int>> get_states(Tensor& X, Tensor& y, vector<string> features, string className)
|
||||
{
|
||||
int max;
|
||||
map<string, vector<int>> states;
|
||||
for (int i = 0; i < X.size(1); i++) {
|
||||
max = X.select(1, i).max().item<int>() + 1;
|
||||
states[features[i]] = vector<int>(max);
|
||||
}
|
||||
max = y.max().item<int>() + 1;
|
||||
states[className] = vector<int>(max);
|
||||
return states;
|
||||
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
|
||||
}
|
||||
|
||||
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name)
|
||||
|
@ -12,8 +12,8 @@ const string PATH = "../../data/";
|
||||
|
||||
bool file_exists(const std::string& name);
|
||||
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
|
||||
pair<torch::Tensor, map<string, int>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string> features);
|
||||
pair<torch::Tensor, map<string, vector<int>>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className);
|
||||
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name);
|
||||
tuple<torch::Tensor, torch::Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last);
|
||||
map<string, vector<int>> get_states(torch::Tensor& X, torch::Tensor& y, vector<string> features, string className);
|
||||
tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(string path, string name, bool class_last, bool discretize_dataset);
|
||||
map<string, vector<int>> get_states(vector<string>& features, string className, map<string, int>& maxes);
|
||||
#endif //PLATFORM_UTILS_H
|
||||
|
Loading…
Reference in New Issue
Block a user