From 6f7fb290b0000a50c7232b68e229de2a5288f372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Wed, 26 Jul 2023 17:49:03 +0200 Subject: [PATCH] Add json lib and json result generation --- .gitmodules | 3 + .vscode/settings.json | 3 +- CMakeLists.txt | 3 +- data/_TAN_cpp_accuracy__.json | 1 + lib/json | 1 + src/Platform/CMakeLists.txt | 7 +- src/Platform/Experiment.cc | 255 +++++++++++++--------------------- src/Platform/Experiment.h | 90 ++++++++++++ src/Platform/Result.h | 62 --------- src/Platform/main.cc | 124 +++++++++++++++++ src/Platform/testx.cpp | 101 -------------- 11 files changed, 325 insertions(+), 325 deletions(-) create mode 100644 data/_TAN_cpp_accuracy__.json create mode 160000 lib/json delete mode 100644 src/Platform/Result.h create mode 100644 src/Platform/main.cc delete mode 100644 src/Platform/testx.cpp diff --git a/.gitmodules b/.gitmodules index 83c2fb7..2989f8a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "lib/argparse"] path = lib/argparse url = https://github.com/p-ranav/argparse +[submodule "lib/json"] + path = lib/json + url = https://github.com/nlohmann/json.git diff --git a/.vscode/settings.json b/.vscode/settings.json index 9159a42..09ba4f0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -100,7 +100,8 @@ "shared_mutex": "cpp", "*.ipp": "cpp", "cassert": "cpp", - "charconv": "cpp" + "charconv": "cpp", + "source_location": "cpp" }, "cmake.configureOnOpen": false, "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools" diff --git a/CMakeLists.txt b/CMakeLists.txt index c285764..8fae3e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") # Options # ------- -option(ENABLE_CLANG_TIDY "Enable to add clang tidy." ON) +option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_TESTING "Unit testing build" ON) option(CODE_COVERAGE "Collect coverage from test library" ON) @@ -43,6 +43,7 @@ include(CodeCoverage) add_git_submodule("lib/mdlp") add_git_submodule("lib/catch2") add_git_submodule("lib/argparse") +add_git_submodule("lib/json") # Subdirectories # -------------- diff --git a/data/_TAN_cpp_accuracy__.json b/data/_TAN_cpp_accuracy__.json new file mode 100644 index 0000000..ec747fa --- /dev/null +++ b/data/_TAN_cpp_accuracy__.json @@ -0,0 +1 @@ +null \ No newline at end of file diff --git a/lib/json b/lib/json new file mode 160000 index 0000000..5d27543 --- /dev/null +++ b/lib/json @@ -0,0 +1 @@ +Subproject commit 5d2754306d67d1e654a1a34e1d2e74439a9d53b3 diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 7a6dada..9d6293c 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -3,7 +3,6 @@ include_directories(${BayesNet_SOURCE_DIR}/src/Platform) include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) -add_executable(main Experiment.cc Folding.cc platformUtils.cc) -add_executable(testx testx.cpp Folding.cc) -target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") -target_link_libraries(testx ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file +include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) +add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc) +target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES} ") \ No newline at end of file diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index 7f73dd5..aa1159d 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -1,164 +1,107 @@ -#include -#include -#include -#include -#include -#include "ArffFiles.h" -#include "Network.h" -#include "BayesMetrics.h" -#include "CPPFImdlp.h" -#include "KDB.h" -#include "SPODE.h" -#include "AODE.h" -#include "TAN.h" -#include "platformUtils.h" -#include "Result.h" -#include "Folding.h" +#include "Experiment.h" +namespace platform { + using json = nlohmann::json; + string get_date_time() + { + time_t rawtime; + tm* timeinfo; + time(&rawtime); + timeinfo = std::localtime(&rawtime); -using namespace std; - -Result cross_validation(Fold* fold, string model_name, Tensor& X, Tensor& y, vector features, string className, map> states) -{ - auto classifiers = map({ - { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, - { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } - } - ); - auto Xt = torch::transpose(X, 0, 1); - auto result = Result(); - auto k = fold->getNumberOfFolds(); - auto accuracy_test = torch::zeros({ k }, kFloat64); - auto accuracy_train = torch::zeros({ k }, kFloat64); - auto train_time = torch::zeros({ k }, kFloat64); - auto test_time = torch::zeros({ k }, kFloat64); - Timer train_timer, test_timer; - for (int i = 0; i < k; i++) { - bayesnet::BaseClassifier* model = classifiers[model_name]; - train_timer.start(); - auto [train, test] = fold->getFold(i); - auto train_t = torch::tensor(train); - auto test_t = torch::tensor(test); - auto X_train = Xt.index({ "...", train_t }); - auto y_train = y.index({ train_t }); - auto X_test = Xt.index({ "...", test_t }); - auto y_test = y.index({ test_t }); - model->fit(X_train, y_train, features, className, states); - cout << "Training Fold " << i + 1 << endl; - cout << "X_train: " << X_train.sizes() << endl; - cout << "y_train: " << y_train.sizes() << endl; - cout << "X_test: " << X_test.sizes() << endl; - cout << "y_test: " << y_test.sizes() << endl; - train_time[i] = train_timer.getDuration(); - auto accuracy_train_value = model->score(X_train, y_train); - test_timer.start(); - auto accuracy_test_value = model->score(X_test, y_test); - test_time[i] = test_timer.getDuration(); - accuracy_train[i] = accuracy_train_value; - accuracy_test[i] = accuracy_test_value; + std::ostringstream oss; + oss << std::put_time(timeinfo, "%Y-%m-%d_%H:%M:%S"); + return oss.str(); } - result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); - result.setScoreTestStd(torch::std(accuracy_test).item()).setScoreTrainStd(torch::std(accuracy_train).item()); - result.setTrainTime(torch::mean(train_time).item()).setTestTime(torch::mean(test_time).item()); - return result; -} - -int main(int argc, char** argv) -{ - map datasets = { - {"diabetes", true}, - {"ecoli", true}, - {"glass", true}, - {"iris", true}, - {"kdd_JapaneseVowels", false}, - {"letter", true}, - {"liver-disorders", true}, - {"mfeat-factors", true}, - }; - auto valid_datasets = vector(); - for (auto dataset : datasets) { - valid_datasets.push_back(dataset.first); + string Experiment::get_file_name() + { + string date_time = get_date_time(); + string result = "results_" + score_name + "_" + model + "_" + platform + "_" + date_time + "_" + (stratified ? "1" : "0") + ".json"; + return result; } - argparse::ArgumentParser program("BayesNetSample"); - program.add_argument("-d", "--dataset") - .help("Dataset file name") - .action([valid_datasets](const std::string& value) { - if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { - return value; + json Experiment::build_json() + { + json result; + result["title"] = title; + result["model"] = model; + result["platform"] = platform; + result["score_name"] = score_name; + result["model_version"] = model_version; + result["language_version"] = language_version; + result["discretized"] = discretized; + result["stratified"] = stratified; + result["nfolds"] = nfolds; + result["random_seeds"] = random_seeds; + result["duration"] = duration; + result["results"] = json::array(); + for (auto& r : results) { + json j; + j["dataset"] = r.getDataset(); + j["hyperparameters"] = r.getHyperparameters(); + j["samples"] = r.getSamples(); + j["features"] = r.getFeatures(); + j["classes"] = r.getClasses(); + j["score_train"] = r.getScoreTrain(); + j["score_test"] = r.getScoreTest(); + j["score_train_std"] = r.getScoreTrainStd(); + j["score_test_std"] = r.getScoreTestStd(); + j["train_time"] = r.getTrainTime(); + j["train_time_std"] = r.getTrainTimeStd(); + j["test_time"] = r.getTestTime(); + j["test_time_std"] = r.getTestTimeStd(); + result["results"].push_back(j); } - throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}"); + return result; + } + void Experiment::save(string path) + { + json data = build_json(); + ofstream file(path + get_file_name()); + file << data; + file.close(); + } + Result cross_validation(Fold* fold, string model_name, torch::Tensor& X, torch::Tensor& y, vector features, string className, map> states) + { + auto classifiers = map({ + { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, + { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } } - ); - program.add_argument("-p", "--path") - .help("folder where the data files are located, default") - .default_value(string{ PATH } - ); - program.add_argument("-m", "--model") - .help("Model to use {AODE, KDB, SPODE, TAN}") - .action([](const std::string& value) { - static const vector choices = { "AODE", "KDB", "SPODE", "TAN" }; - if (find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}"); - } - ); - program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); - program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); - program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { - try { - auto k = stoi(value); - if (k < 2) { - throw runtime_error("Number of folds must be greater than 1"); - } - return k; - } - catch (const runtime_error& err) { - throw runtime_error(err.what()); - } - catch (...) { - throw runtime_error("Number of folds must be an integer"); - }}); - program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); - bool class_last, discretize_dataset, stratified; - int n_folds, seed; - string model_name, file_name, path, complete_file_name; - try { - program.parse_args(argc, argv); - file_name = program.get("dataset"); - path = program.get("path"); - model_name = program.get("model"); - discretize_dataset = program.get("discretize"); - stratified = program.get("stratified"); - n_folds = program.get("folds"); - seed = program.get("seed"); - complete_file_name = path + file_name + ".arff"; - class_last = datasets[file_name]; - if (!file_exists(complete_file_name)) { - throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); + ); + auto Xt = torch::transpose(X, 0, 1); + auto result = Result(); + auto k = fold->getNumberOfFolds(); + auto accuracy_test = torch::zeros({ k }, torch::kFloat64); + auto accuracy_train = torch::zeros({ k }, torch::kFloat64); + auto train_time = torch::zeros({ k }, torch::kFloat64); + auto test_time = torch::zeros({ k }, torch::kFloat64); + Timer train_timer, test_timer; + for (int i = 0; i < k; i++) { + bayesnet::BaseClassifier* model = classifiers[model_name]; + train_timer.start(); + auto [train, test] = fold->getFold(i); + auto train_t = torch::tensor(train); + auto test_t = torch::tensor(test); + auto X_train = Xt.index({ "...", train_t }); + auto y_train = y.index({ train_t }); + auto X_test = Xt.index({ "...", test_t }); + auto y_test = y.index({ test_t }); + model->fit(X_train, y_train, features, className, states); + cout << "Training Fold " << i + 1 << endl; + cout << "X_train: " << X_train.sizes() << endl; + cout << "y_train: " << y_train.sizes() << endl; + cout << "X_test: " << X_test.sizes() << endl; + cout << "y_test: " << y_test.sizes() << endl; + train_time[i] = train_timer.getDuration(); + auto accuracy_train_value = model->score(X_train, y_train); + test_timer.start(); + auto accuracy_test_value = model->score(X_test, y_test); + test_time[i] = test_timer.getDuration(); + accuracy_train[i] = accuracy_train_value; + accuracy_test[i] = accuracy_test_value; } + result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); + result.setScoreTestStd(torch::std(accuracy_test).item()).setScoreTrainStd(torch::std(accuracy_train).item()); + result.setTrainTime(torch::mean(train_time).item()).setTestTime(torch::mean(test_time).item()); + return result; } - catch (const exception& err) { - cerr << err.what() << endl; - cerr << program; - exit(1); - } - /* - * Begin Processing - */ - auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset); - Fold* fold; - if (stratified) - fold = new StratifiedKFold(n_folds, y, seed); - else - fold = new KFold(n_folds, y.numel(), seed); - auto experiment = Experiment(); - experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp"); - experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy"); - auto result = cross_validation(fold, model_name, X, y, features, className, states); - result.setDataset(file_name); - experiment.addResult(result); - experiment.save(path); - experiment.show(); - return 0; -} +} \ No newline at end of file diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h index e69de29..ce68052 100644 --- a/src/Platform/Experiment.h +++ b/src/Platform/Experiment.h @@ -0,0 +1,90 @@ +#ifndef EXPERIMENT_H +#define EXPERIMENT_H +#include +#include +#include +#include +#include "Folding.h" +#include "BaseClassifier.h" +#include "TAN.h" +#include "KDB.h" +#include "AODE.h" + +using namespace std; +namespace platform { + using json = nlohmann::json; + class Timer { + private: + chrono::time_point begin; + public: + Timer() = default; + ~Timer() = default; + void start() { begin = chrono::high_resolution_clock::now(); } + float getDuration() { return chrono::duration_cast(chrono::high_resolution_clock::now() - begin).count(); } + }; + class Result { + private: + string dataset, hyperparameters; + int samples, features, classes; + float score_train, score_test, score_train_std, score_test_std, train_time, train_time_std, test_time, test_time_std; + public: + Result() = default; + Result& setDataset(string dataset) { this->dataset = dataset; return *this; } + Result& setHyperparameters(string hyperparameters) { this->hyperparameters = hyperparameters; return *this; } + Result& setSamples(int samples) { this->samples = samples; return *this; } + Result& setFeatures(int features) { this->features = features; return *this; } + Result& setClasses(int classes) { this->classes = classes; return *this; } + Result& setScoreTrain(float score) { this->score_train = score; return *this; } + Result& setScoreTest(float score) { this->score_test = score; return *this; } + Result& setScoreTrainStd(float score_std) { this->score_train_std = score_std; return *this; } + Result& setScoreTestStd(float score_std) { this->score_test_std = score_std; return *this; } + Result& setTrainTime(float train_time) { this->train_time = train_time; return *this; } + Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; } + Result& setTestTime(float test_time) { this->test_time = test_time; return *this; } + Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; } + const float get_score_train() const { return score_train; } + float get_score_test() { return score_test; } + const string& getDataset() const { return dataset; } + const string& getHyperparameters() const { return hyperparameters; } + const int getSamples() const { return samples; } + const int getFeatures() const { return features; } + const int getClasses() const { return classes; } + const float getScoreTrain() const { return score_train; } + const float getScoreTest() const { return score_test; } + const float getScoreTrainStd() const { return score_train_std; } + const float getScoreTestStd() const { return score_test_std; } + const float getTrainTime() const { return train_time; } + const float getTrainTimeStd() const { return train_time_std; } + const float getTestTime() const { return test_time; } + const float getTestTimeStd() const { return test_time_std; } + }; + class Experiment { + private: + string title, model, platform, score_name, model_version, language_version; + bool discretized, stratified; + vector results; + vector random_seeds; + int nfolds; + float duration; + json build_json(); + public: + Experiment() = default; + Experiment& setTitle(string title) { this->title = title; return *this; } + Experiment& setModel(string model) { this->model = model; return *this; } + Experiment& setPlatform(string platform) { this->platform = platform; return *this; } + Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; } + Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; } + Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; } + Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } + Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } + Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; } + Experiment& addResult(Result result) { results.push_back(result); return *this; } + Experiment& addRandomSeed(int random_seed) { random_seeds.push_back(random_seed); return *this; } + Experiment& setDuration(float duration) { this->duration = duration; return *this; } + string get_file_name(); + void save(string path); + void show() { cout << "Showing experiment..." << "Score Test: " << results[0].get_score_test() << " Score Train: " << results[0].get_score_train() << endl; } + }; + Result cross_validation(Fold* fold, string model_name, torch::Tensor& X, torch::Tensor& y, vector features, string className, map> states); +} +#endif \ No newline at end of file diff --git a/src/Platform/Result.h b/src/Platform/Result.h deleted file mode 100644 index f347292..0000000 --- a/src/Platform/Result.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef RESULT_H -#define RESULT_H -#include -#include - -using namespace std; -class Timer { -private: - chrono::time_point begin; -public: - Timer() = default; - ~Timer() = default; - void start() { begin = chrono::high_resolution_clock::now(); } - float getDuration() { return chrono::duration_cast(chrono::high_resolution_clock::now() - begin).count(); } -}; -class Result { -private: - string dataset, hyperparameters; - int samples, features, classes; - float score_train, score_test, score_train_std, score_test_std, train_time, train_time_std, test_time, test_time_std; -public: - Result() = default; - Result& setDataset(string dataset) { this->dataset = dataset; return *this; } - Result& setHyperparameters(string hyperparameters) { this->hyperparameters = hyperparameters; return *this; } - Result& setSamples(int samples) { this->samples = samples; return *this; } - Result& setFeatures(int features) { this->features = features; return *this; } - Result& setClasses(int classes) { this->classes = classes; return *this; } - Result& setScoreTrain(float score) { this->score_train = score; return *this; } - Result& setScoreTest(float score) { this->score_test = score; return *this; } - Result& setScoreTrainStd(float score_std) { this->score_train_std = score_std; return *this; } - Result& setScoreTestStd(float score_std) { this->score_test_std = score_std; return *this; } - Result& setTrainTime(float train_time) { this->train_time = train_time; return *this; } - Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; } - Result& setTestTime(float test_time) { this->test_time = test_time; return *this; } - Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; } - float get_score_train() { return score_train; } - float get_score_test() { return score_test; } -}; -class Experiment { -private: - string title, model, platform, score_name, model_version, language_version; - bool discretized, stratified; - vector results; - vector random_seeds; - int nfolds; -public: - Experiment() = default; - Experiment& setTitle(string title) { this->title = title; return *this; } - Experiment& setModel(string model) { this->model = model; return *this; } - Experiment& setPlatform(string platform) { this->platform = platform; return *this; } - Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; } - Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; } - Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; } - Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } - Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } - Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; } - Experiment& addResult(Result result) { results.push_back(result); return *this; } - Experiment& addRandomSeed(int random_seed) { random_seeds.push_back(random_seed); return *this; } - void save(string path) { cout << "Saving experiment..." << endl; } - void show() { cout << "Showing experiment..." << "Score Test: " << results[0].get_score_test() << " Score Train: " << results[0].get_score_train() << endl; } -}; -#endif \ No newline at end of file diff --git a/src/Platform/main.cc b/src/Platform/main.cc new file mode 100644 index 0000000..b2f78b7 --- /dev/null +++ b/src/Platform/main.cc @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include "ArffFiles.h" +#include "Network.h" +#include "BayesMetrics.h" +#include "CPPFImdlp.h" +#include "KDB.h" +#include "SPODE.h" +#include "AODE.h" +#include "TAN.h" +#include "platformUtils.h" +#include "Experiment.h" +#include "Folding.h" + + +using namespace std; + + + +int main(int argc, char** argv) +{ + map datasets = { + {"diabetes", true}, + {"ecoli", true}, + {"glass", true}, + {"iris", true}, + {"kdd_JapaneseVowels", false}, + {"letter", true}, + {"liver-disorders", true}, + {"mfeat-factors", true}, + }; + auto valid_datasets = vector(); + for (auto dataset : datasets) { + valid_datasets.push_back(dataset.first); + } + argparse::ArgumentParser program("BayesNetSample"); + program.add_argument("-d", "--dataset") + .help("Dataset file name") + .action([valid_datasets](const std::string& value) { + if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { + return value; + } + throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}"); + } + ); + program.add_argument("-p", "--path") + .help("folder where the data files are located, default") + .default_value(string{ PATH } + ); + program.add_argument("-m", "--model") + .help("Model to use {AODE, KDB, SPODE, TAN}") + .action([](const std::string& value) { + static const vector choices = { "AODE", "KDB", "SPODE", "TAN" }; + if (find(choices.begin(), choices.end(), value) != choices.end()) { + return value; + } + throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}"); + } + ); + program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); + program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); + program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { + try { + auto k = stoi(value); + if (k < 2) { + throw runtime_error("Number of folds must be greater than 1"); + } + return k; + } + catch (const runtime_error& err) { + throw runtime_error(err.what()); + } + catch (...) { + throw runtime_error("Number of folds must be an integer"); + }}); + program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); + bool class_last, discretize_dataset, stratified; + int n_folds, seed; + string model_name, file_name, path, complete_file_name; + try { + program.parse_args(argc, argv); + file_name = program.get("dataset"); + path = program.get("path"); + model_name = program.get("model"); + discretize_dataset = program.get("discretize"); + stratified = program.get("stratified"); + n_folds = program.get("folds"); + seed = program.get("seed"); + complete_file_name = path + file_name + ".arff"; + class_last = datasets[file_name]; + if (!file_exists(complete_file_name)) { + throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); + } + } + catch (const exception& err) { + cerr << err.what() << endl; + cerr << program; + exit(1); + } + /* + * Begin Processing + */ + auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset); + Fold* fold; + if (stratified) + fold = new StratifiedKFold(n_folds, y, seed); + else + fold = new KFold(n_folds, y.numel(), seed); + auto experiment = platform::Experiment(); + experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp"); + experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy"); + platform::Timer timer; + timer.start(); + auto result = platform::cross_validation(fold, model_name, X, y, features, className, states); + result.setDataset(file_name); + experiment.addResult(result); + experiment.setDuration(timer.getDuration()); + experiment.save(path); + experiment.show(); + return 0; +} diff --git a/src/Platform/testx.cpp b/src/Platform/testx.cpp deleted file mode 100644 index 09b083a..0000000 --- a/src/Platform/testx.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include "Folding.h" -#include -#include -using namespace std; -class A { -private: - int a; -public: - A(int a) : a(a) {} - int getA() { return a; } -}; -class B : public A { -private: - int b; -public: - B(int a, int b) : A(a), b(b) {} - int getB() { return b; } -}; -class C : public A { -private: - int c; -public: - C(int a, int c) : A(a), c(c) {} - int getC() { return c; } -}; - -string counts(vector y, vector indices) -{ - auto result = map(); - for (auto i = 0; i < indices.size(); ++i) { - result[y[indices[i]]]++; - } - string final_result = ""; - for (auto i = 0; i < result.size(); ++i) - final_result += to_string(i) + " -> " + to_string(result[i]) + " // "; - final_result += "\n"; - return final_result; -} - -int main() -{ - auto y = vector(153); - fill(y.begin(), y.begin() + 50, 0); - fill(y.begin() + 50, y.begin() + 103, 1); - fill(y.begin() + 103, y.end(), 2); - //auto fold = KFold(5, 150); - auto fold = StratifiedKFold(5, y, -1); - for (int i = 0; i < 5; ++i) { - cout << "Fold: " << i << endl; - auto [train, test] = fold.getFold(i); - cout << "Train: "; - cout << "(" << train.size() << "): "; - for (auto j = 0; j < static_cast(train.size()); j++) - cout << train[j] << ", "; - cout << endl; - cout << "Train Statistics : " << counts(y, train); - cout << "-------------------------------------------------------------------------------" << endl; - cout << "Test: "; - cout << "(" << test.size() << "): "; - for (auto j = 0; j < static_cast(test.size()); j++) - cout << test[j] << ", "; - cout << endl; - cout << "Test Statistics: " << counts(y, test); - cout << "==============================================================================" << endl; - torch::Tensor a = torch::zeros({ 5, 3 }); - torch::Tensor b = torch::zeros({ 5 }) + 1; - torch::Tensor c = torch::cat({ a, b.view({5, 1}) }, 1); - cout << "a:" << a.sizes() << endl; - cout << a << endl; - cout << "b:" << b.sizes() << endl; - cout << b << endl; - cout << "c:" << c.sizes() << endl; - cout << c << endl; - torch::Tensor d = torch::zeros({ 5, 3 }); - torch::Tensor e = torch::tensor({ 1,2,3,4,5 }) + 1; - torch::Tensor f = torch::cat({ d, e.view({5, 1}) }, 1); - cout << "d:" << d.sizes() << endl; - cout << d << endl; - cout << "e:" << e.sizes() << endl; - cout << e << endl; - cout << "f:" << f.sizes() << endl; - cout << f << endl; - auto indices = torch::tensor({ 0, 2, 4 }); - auto k = f.index({ indices, "..." }); - cout << "k:" << k.sizes() << endl; - cout << k << endl; - auto w = torch::index_select(f, 0, indices); - cout << "w:" << w.sizes() << endl; - cout << w << endl; - - // cout << "Vector poly" << endl; - // auto some = vector(); - // auto cx = C(5, 4); - // auto bx = B(7, 6); - // some.push_back(cx); - // some.push_back(bx); - // for (auto& obj : some) { - // cout << "Obj :" << obj.getA() << endl; - // } - } -}