diff --git a/.vscode/launch.json b/.vscode/launch.json index fde5435..415f773 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -16,10 +16,18 @@ { "type": "lldb", "request": "launch", - "name": "aout", - "program": "${workspaceFolder}/a.out", - "args": [], - "cwd": "${workspaceFolder}" + "name": "experiment", + "program": "${workspaceFolder}/build/src/Platform/main", + "args": [ + "-f", + "iris", + "-m", + "TAN", + "-p", + "../../../data/", + "--discretize" + ], + "cwd": "${workspaceFolder}/build/src/Platform", }, { "name": "Build & debug active file", diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index f011f40..27a2ef7 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -12,16 +12,22 @@ #include "AODE.h" #include "TAN.h" #include "platformUtils.h" +#include "Result.h" #include "Folding.h" using namespace std; -pair cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector features, string className, map> states) +Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector features, string className, map> states) { + auto result = Result(); auto k = fold->getNumberOfFolds(); - float accuracy = 0.0; + auto accuracy = torch::zeros({ k }, kFloat64); + auto train_time = torch::zeros({ k }, kFloat64); + auto test_time = torch::zeros({ k }, kFloat64); + Timer train_timer, test_timer; for (int i = 0; i < k; i++) { + train_timer.start(); auto [train, test] = fold->getFold(i); auto train_t = torch::tensor(train); auto test_t = torch::tensor(test); @@ -30,10 +36,15 @@ pair cross_validation(Fold* fold, bayesnet::BaseClassifier* model, auto X_test = X.index({ test_t }); auto y_test = y.index({ test_t }); model->fit(X_train, y_train, features, className, states); + train_time[i] = train_timer.getDuration(); + test_timer.start(); auto acc = model->score(X_test, y_test); - accuracy += acc; + test_time[i] = test_timer.getDuration(); + accuracy[i] = acc; } - return { accuracy / k, 0 }; + result.setScore(torch::mean(accuracy).item()); + result.setTrainTime(torch::mean(train_time).item()).setTestTime(torch::mean(test_time).item()); + return result; } int main(int argc, char** argv) @@ -96,25 +107,23 @@ int main(int argc, char** argv) cerr << program; exit(1); } - /* * Begin Processing */ - auto [X, y, features, className] = loadDataset(file_name, discretize_dataset, class_last); - auto states = map>(); - if (discretize_dataset) { - auto [Xd, maxes] = discretizeTorch(X, y, features); - states = get_states(Xd, y, features, className); - X = Xd; - } + auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset); auto fold = StratifiedKFold(5, y, -1); auto classifiers = map({ { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } } ); + auto experiment = Experiment(); + experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp"); + experiment.setStratified(true).setNFolds(5).addRandomSeed(271).setScoreName("accuracy"); bayesnet::BaseClassifier* model = classifiers[model_name]; - auto results = cross_validation(&fold, model, X, y, features, className, states); - cout << "Accuracy: " << results.first << endl; + auto result = cross_validation(&fold, model, X, y, features, className, states); + result.setDataset(file_name); + experiment.addResult(result); + experiment.save(path); return 0; } diff --git a/src/Platform/Result.h b/src/Platform/Result.h new file mode 100644 index 0000000..a82910d --- /dev/null +++ b/src/Platform/Result.h @@ -0,0 +1,57 @@ +#ifndef RESULT_H +#define RESULT_H +#include +#include + +using namespace std; +class Timer { +private: + chrono::time_point begin; +public: + Timer() = default; + ~Timer() = default; + void start() { begin = chrono::high_resolution_clock::now(); } + float getDuration() { return chrono::duration_cast(chrono::high_resolution_clock::now() - begin).count(); } +}; +class Result { +private: + string dataset, hyperparameters; + int samples, features, classes; + float score, score_std, train_time, train_time_std, test_time, test_time_std; +public: + Result() = default; + Result& setDataset(string dataset) { this->dataset = dataset; return *this; } + Result& setHyperparameters(string hyperparameters) { this->hyperparameters = hyperparameters; return *this; } + Result& setSamples(int samples) { this->samples = samples; return *this; } + Result& setFeatures(int features) { this->features = features; return *this; } + Result& setClasses(int classes) { this->classes = classes; return *this; } + Result& setScore(float score) { this->score = score; return *this; } + Result& setScoreStd(float score_std) { this->score_std = score_std; return *this; } + Result& setTrainTime(float train_time) { this->train_time = train_time; return *this; } + Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; } + Result& setTestTime(float test_time) { this->test_time = test_time; return *this; } + Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; } +}; +class Experiment { +private: + string title, model, platform, score_name, model_version, language_version; + bool discretized, stratified; + vector results; + vector random_seeds; + int nfolds; +public: + Experiment() = default; + Experiment& setTitle(string title) { this->title = title; return *this; } + Experiment& setModel(string model) { this->model = model; return *this; } + Experiment& setPlatform(string platform) { this->platform = platform; return *this; } + Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; } + Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; } + Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; } + Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } + Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } + Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; } + Experiment& addResult(Result result) { results.push_back(result); return *this; } + Experiment& addRandomSeed(int random_seed) { random_seeds.push_back(random_seed); return *this; } + void save(string path) { cout << "Saving experiment..." << endl; } +}; +#endif \ No newline at end of file diff --git a/src/Platform/platformUtils.cc b/src/Platform/platformUtils.cc index 6f64858..4c383ec 100644 --- a/src/Platform/platformUtils.cc +++ b/src/Platform/platformUtils.cc @@ -15,22 +15,6 @@ pair, map> discretize(vector> discretizeTorch(Tensor& X, Tensor& y, vector features) -{ - map maxes; - auto fimdlp = mdlp::CPPFImdlp(); - auto Xd = torch::zeros_like(X, torch::kInt64); - auto yv = vector(y.data_ptr(), y.data_ptr() + y.size(0)); - for (int i = 0; i < X.size(1); i++) { - auto xv = vector(X.select(1, i).data_ptr(), X.select(1, i).data_ptr() + X.size(0)); - fimdlp.fit(xv, yv); - auto xdv = fimdlp.transform(xv); - auto xd = torch::tensor(xdv, torch::kInt64); - maxes[features[i]] = xd.max().item() + 1; - Xd.index_put_({ "...", i }, xd); - } - return { Xd, maxes }; -} vector discretizeDataset(vector& X, mdlp::labels_t& y) { @@ -54,10 +38,10 @@ bool file_exists(const std::string& name) } } -tuple < Tensor, Tensor, vector, string> loadDataset(string name, bool discretize, bool class_last) +tuple, string, map>> loadDataset(string path, string name, bool class_last, bool discretize_dataset) { auto handler = ArffFiles(); - handler.load(PATH + static_cast(name) + ".arff", class_last); + handler.load(path + static_cast(name) + ".arff", class_last); // Get Dataset X, y vector& X = handler.getX(); mdlp::labels_t& y = handler.getY(); @@ -68,32 +52,24 @@ tuple < Tensor, Tensor, vector, string> loadDataset(string name, bool di features.push_back(feature.first); } Tensor Xd; - if (discretize) { + auto states = map>(); + if (discretize_dataset) { auto Xr = discretizeDataset(X, y); Xd = torch::zeros({ static_cast(Xr[0].size()), static_cast(Xr.size()) }, torch::kInt64); for (int i = 0; i < features.size(); ++i) { + states[features[i]] = vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); + iota(begin(states[features[i]]), end(states[features[i]]), 0); Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64)); } + states[className] = vector(*max_element(y.begin(), y.end()) + 1); + iota(begin(states[className]), end(states[className]), 0); } else { - Xd = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, torch::kFloat64); + Xd = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, torch::kFloat32); for (int i = 0; i < features.size(); ++i) { - Xd.index_put_({ "...", i }, torch::tensor(X[i], torch::kFloat64)); + Xd.index_put_({ "...", i }, torch::tensor(X[i])); } } - return { Xd, torch::tensor(y, torch::kInt64), features, className }; -} - -map> get_states(Tensor& X, Tensor& y, vector features, string className) -{ - int max; - map> states; - for (int i = 0; i < X.size(1); i++) { - max = X.select(1, i).max().item() + 1; - states[features[i]] = vector(max); - } - max = y.max().item() + 1; - states[className] = vector(max); - return states; + return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; } tuple>, vector, vector, string, map>> loadFile(string name) diff --git a/src/Platform/platformUtils.h b/src/Platform/platformUtils.h index 9eefd66..c17c855 100644 --- a/src/Platform/platformUtils.h +++ b/src/Platform/platformUtils.h @@ -12,8 +12,8 @@ const string PATH = "../../data/"; bool file_exists(const std::string& name); pair, map> discretize(vector& X, mdlp::labels_t& y, vector features); -pair> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector features); +pair>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector& features, string className); tuple>, vector, vector, string, map>> loadFile(string name); -tuple, string> loadDataset(string name, bool discretize, bool class_last); -map> get_states(torch::Tensor& X, torch::Tensor& y, vector features, string className); +tuple, string, map>> loadDataset(string path, string name, bool class_last, bool discretize_dataset); +map> get_states(vector& features, string className, map& maxes); #endif //PLATFORM_UTILS_H