diff --git a/.vscode/launch.json b/.vscode/launch.json index 14f9cc8..b5d7fc1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -31,6 +31,8 @@ "--stratified", "--title", "Debug test", + "--seeds", + "1", "-d", "ionosphere" ], diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 4f9d087..000a88b 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -3,5 +3,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) -add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc) +add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/sample/sample.cc b/sample/sample.cc index f515405..038e03f 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -4,16 +4,11 @@ #include #include #include -#include "BaseClassifier.h" #include "ArffFiles.h" -#include "Network.h" #include "BayesMetrics.h" #include "CPPFImdlp.h" -#include "KDB.h" -#include "SPODE.h" -#include "AODE.h" -#include "TAN.h" #include "Folding.h" +#include "Models.h" using namespace std; @@ -91,13 +86,13 @@ int main(int argc, char** argv) .default_value(string{ PATH } ); program.add_argument("-m", "--model") - .help("Model to use {AODE, KDB, SPODE, TAN}") + .help("Model to use " + platform::Models::toString()) .action([](const std::string& value) { - static const vector choices = { "AODE", "KDB", "SPODE", "TAN" }; + static const vector choices = platform::Models::getNames(); if (find(choices.begin(), choices.end(), value) != choices.end()) { return value; } - throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}"); + throw runtime_error("Model must be one of " + platform::Models::toString()); } ); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); @@ -164,12 +159,8 @@ int main(int argc, char** argv) states[feature] = vector(maxes[feature]); } states[className] = vector(maxes[className]); - auto classifiers = map({ - { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, - { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } - } - ); - bayesnet::BaseClassifier* clf = classifiers[model_name]; + + bayesnet::BaseClassifier* clf = platform::Models::get(model_name); clf->fit(Xd, y, features, className, states); auto score = clf->score(Xd, y); auto lines = clf->show(); diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 58b1f41..38d3540 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -4,5 +4,5 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) -add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc CrossValidation.cc) +add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc CrossValidation.cc Models.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Platform/CrossValidation.cc b/src/Platform/CrossValidation.cc index 6ac0682..b27788b 100644 --- a/src/Platform/CrossValidation.cc +++ b/src/Platform/CrossValidation.cc @@ -1,8 +1,5 @@ #include "CrossValidation.h" -#include "AODE.h" -#include "TAN.h" -#include "KDB.h" -#include "SPODE.h" +#include "Models.h" namespace platform { using json = nlohmann::json; @@ -10,10 +7,6 @@ namespace platform { CrossValidation::CrossValidation(string modelName, bool stratified, int nfolds, vector randomSeeds, platform::Datasets& datasets) : modelName(modelName), stratified(stratified), nfolds(nfolds), randomSeeds(randomSeeds), datasets(datasets) { - classifiers = map({ - { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, - { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } - }); } Result CrossValidation::crossValidate(string fileName) @@ -45,7 +38,7 @@ namespace platform { fold = new KFold(nfolds, samples, seed); cout << "Fold: " << flush; for (int nfold = 0; nfold < nfolds; nfold++) { - bayesnet::BaseClassifier* model = classifiers[modelName]; + bayesnet::BaseClassifier* model = Models::get(modelName); result.setModelVersion(model->getVersion()); train_timer.start(); auto [train, test] = fold->getFold(nfold); @@ -67,6 +60,11 @@ namespace platform { test_time[item] = test_timer.getDuration(); accuracy_train[item] = accuracy_train_value; accuracy_test[item] = accuracy_test_value; + // Store results and times in vector + result.addScoreTrain(accuracy_train_value); + result.addScoreTest(accuracy_test_value); + result.addTimeTrain(train_time[item].item()); + result.addTimeTest(test_time[item].item()); item++; } delete fold; diff --git a/src/Platform/CrossValidation.h b/src/Platform/CrossValidation.h index ccbe7c3..ff084d7 100644 --- a/src/Platform/CrossValidation.h +++ b/src/Platform/CrossValidation.h @@ -5,7 +5,6 @@ #include #include #include "Folding.h" -#include "BaseClassifier.h" #include "Datasets.h" #include "Experiment.h" @@ -17,7 +16,6 @@ namespace platform { string modelName; vector randomSeeds; platform::Datasets& datasets; - map classifiers; public: CrossValidation(string modelName, bool stratified, int nfolds, vector randomSeeds, platform::Datasets& datasets); ~CrossValidation() = default; diff --git a/src/Platform/Datasets.h b/src/Platform/Datasets.h index f6a4c5b..1d178b1 100644 --- a/src/Platform/Datasets.h +++ b/src/Platform/Datasets.h @@ -60,6 +60,7 @@ namespace platform { pair getTensors(string name); bool isDataset(string name); }; + vector split(string, char); }; #endif \ No newline at end of file diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index f3b5a3f..b86fa03 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -65,6 +65,10 @@ namespace platform { j["test_time_std"] = r.getTestTimeStd(); j["time"] = r.getTestTime() + r.getTrainTime(); j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd(); + j["scores_train"] = r.getScoresTrain(); + j["scores_test"] = r.getScoresTest(); + j["times_train"] = r.getTimesTrain(); + j["times_test"] = r.getTimesTest(); j["nodes"] = r.getNodes(); j["leaves"] = r.getLeaves(); j["depth"] = r.getDepth(); diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h index db479e8..a9cadfb 100644 --- a/src/Platform/Experiment.h +++ b/src/Platform/Experiment.h @@ -27,6 +27,7 @@ namespace platform { string dataset, hyperparameters, model_version; int samples, features, classes; double score_train, score_test, score_train_std, score_test_std, train_time, train_time_std, test_time, test_time_std; + vector scores_train, scores_test, times_train, times_test; float nodes, leaves, depth; public: Result() = default; @@ -47,6 +48,10 @@ namespace platform { Result& setLeaves(float leaves) { this->leaves = leaves; return *this; } Result& setDepth(float depth) { this->depth = depth; return *this; } Result& setModelVersion(string model_version) { this->model_version = model_version; return *this; } + Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; } + Result& addScoreTest(double score) { scores_test.push_back(score); return *this; } + Result& addTimeTrain(double time) { times_train.push_back(time); return *this; } + Result& addTimeTest(double time) { times_test.push_back(time); return *this; } const float get_score_train() const { return score_train; } float get_score_test() { return score_test; } const string& getDataset() const { return dataset; } @@ -65,6 +70,10 @@ namespace platform { const float getNodes() const { return nodes; } const float getLeaves() const { return leaves; } const float getDepth() const { return depth; } + const vector& getScoresTrain() const { return scores_train; } + const vector& getScoresTest() const { return scores_test; } + const vector& getTimesTrain() const { return times_train; } + const vector& getTimesTest() const { return times_test; } const string& getModelVersion() const { return model_version; } }; class Experiment { diff --git a/src/Platform/Models.cc b/src/Platform/Models.cc new file mode 100644 index 0000000..cde6f09 --- /dev/null +++ b/src/Platform/Models.cc @@ -0,0 +1,8 @@ +#include "Models.h" +namespace platform { + using namespace std; + map Models::classifiers = map({ + { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, + { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } + }); +} diff --git a/src/Platform/Models.h b/src/Platform/Models.h new file mode 100644 index 0000000..2851036 --- /dev/null +++ b/src/Platform/Models.h @@ -0,0 +1,33 @@ +#ifndef MODELS_H +#define MODELS_H +#include +#include "BaseClassifier.h" +#include "AODE.h" +#include "TAN.h" +#include "KDB.h" +#include "SPODE.h" +namespace platform { + class Models { + private: + static map classifiers; + public: + static bayesnet::BaseClassifier* get(string name) { return classifiers[name]; } + static vector getNames() + { + vector names; + for (auto& [name, classifier] : classifiers) { + names.push_back(name); + } + return names; + } + static string toString() + { + string names = ""; + for (auto& [name, classifier] : classifiers) { + names += name + ", "; + } + return "{" + names.substr(0, names.size() - 2) + "}"; + } + }; +} +#endif \ No newline at end of file diff --git a/src/Platform/main.cc b/src/Platform/main.cc index 4aaad09..25984d6 100644 --- a/src/Platform/main.cc +++ b/src/Platform/main.cc @@ -5,6 +5,7 @@ #include "Datasets.h" #include "DotEnv.h" #include "CrossValidation.h" +#include "Models.h" using namespace std; @@ -21,16 +22,16 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) .default_value(string{ PATH_DATASETS } ); program.add_argument("-m", "--model") - .help("Model to use {AODE, KDB, SPODE, TAN}") + .help("Model to use " + platform::Models::toString()) .action([](const std::string& value) { - static const vector choices = { "AODE", "KDB", "SPODE", "TAN" }; + static const vector choices = platform::Models::getNames(); if (find(choices.begin(), choices.end(), value) != choices.end()) { return value; } - throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}"); + throw runtime_error("Model must be one of " + platform::Models::toString()); } ); - program.add_argument("--title").required().help("Experiment title"); + program.add_argument("--title").default_value("").help("Experiment title"); program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const string& value) { @@ -47,9 +48,8 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) catch (...) { throw runtime_error("Number of folds must be an integer"); }}); - auto seed_values = env.getSeeds(); - program.add_argument("-s", "--seeds").help("Random seeds comma separated. Set to -1 to have pseudo random").default_value(seed_values); + program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); bool class_last, discretize_dataset, stratified; int n_folds; vector seeds; @@ -66,6 +66,9 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) complete_file_name = path + file_name + ".arff"; class_last = false;//datasets[file_name]; title = program.get("title"); + if (title == "" && file_name == "") { + throw runtime_error("title is mandatory if dataset is not provided"); + } } catch (const exception& err) { cerr << err.what() << endl; @@ -89,17 +92,20 @@ int main(int argc, char** argv) auto seeds = program.get>("seeds"); vector filesToProcess; auto datasets = platform::Datasets(path, true, platform::ARFF); + auto title = program.get("title"); if (file_name != "") { if (!datasets.isDataset(file_name)) { cerr << "Dataset " << file_name << " not found" << endl; exit(1); } + if (title == "") { + title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds"; + } filesToProcess.push_back(file_name); } else { filesToProcess = platform::Datasets(path, true, platform::ARFF).getNames(); saveResults = true; // Only save results if all datasets are processed } - auto title = program.get("title"); /* * Begin Processing