diff --git a/CMakeLists.txt b/CMakeLists.txt index 137e21e..89e564e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,9 +37,14 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_P # Subdirectories # -------------- add_subdirectory(config) -add_subdirectory(src) +add_subdirectory(${BayesNet_SOURCE_DIR}/src/BayesNet) +add_subdirectory(${BayesNet_SOURCE_DIR}/src/Platform) add_subdirectory(sample) +file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.hpp) +file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp) +file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform/*.cc ${BayesNet_SOURCE_DIR}/src/Platform/*.cpp) + # Testing # ------- if (ENABLE_TESTING) diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 28f910a..62d677d 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -1,3 +1,4 @@ -include_directories(${BayesNet_SOURCE_DIR}/src) -add_executable(main main.cc ArffFiles.cc CPPFImdlp.cpp Metrics.cpp) -target_link_libraries(main BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file +include_directories(${BayesNet_SOURCE_DIR}/src/Platform) +include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) +add_executable(sample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/ArffFiles.cc ${BayesNet_SOURCE_DIR}/src/Platform/CPPFImdlp.cpp ${BayesNet_SOURCE_DIR}/src/Platform/Metrics.cpp ${BayesNet_SOURCE_DIR}/src/Platform/typesFImdlp.h ${BayesNet_HEADERS}) +target_link_libraries(sample BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/sample/main.cc b/sample/sample.cc similarity index 100% rename from sample/main.cc rename to sample/sample.cc diff --git a/src/AODE.cc b/src/BayesNet/AODE.cc similarity index 100% rename from src/AODE.cc rename to src/BayesNet/AODE.cc diff --git a/src/AODE.h b/src/BayesNet/AODE.h similarity index 100% rename from src/AODE.h rename to src/BayesNet/AODE.h diff --git a/src/BaseClassifier.cc b/src/BayesNet/BaseClassifier.cc similarity index 99% rename from src/BaseClassifier.cc rename to src/BayesNet/BaseClassifier.cc index 21b69a1..0f0adba 100644 --- a/src/BaseClassifier.cc +++ b/src/BayesNet/BaseClassifier.cc @@ -1,5 +1,5 @@ #include "BaseClassifier.h" -#include "utils.h" +#include "bayesnetUtils.h" namespace bayesnet { using namespace std; diff --git a/src/BaseClassifier.h b/src/BayesNet/BaseClassifier.h similarity index 100% rename from src/BaseClassifier.h rename to src/BayesNet/BaseClassifier.h diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt new file mode 100644 index 0000000..5473662 --- /dev/null +++ b/src/BayesNet/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc Metrics.cc BaseClassifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc Mst.cc) +target_link_libraries(BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Ensemble.cc b/src/BayesNet/Ensemble.cc similarity index 100% rename from src/Ensemble.cc rename to src/BayesNet/Ensemble.cc diff --git a/src/Ensemble.h b/src/BayesNet/Ensemble.h similarity index 97% rename from src/Ensemble.h rename to src/BayesNet/Ensemble.h index 8db299d..3787cca 100644 --- a/src/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -3,7 +3,7 @@ #include #include "BaseClassifier.h" #include "Metrics.hpp" -#include "utils.h" +#include "bayesnetUtils.h" using namespace std; using namespace torch; diff --git a/src/KDB.cc b/src/BayesNet/KDB.cc similarity index 100% rename from src/KDB.cc rename to src/BayesNet/KDB.cc diff --git a/src/KDB.h b/src/BayesNet/KDB.h similarity index 94% rename from src/KDB.h rename to src/BayesNet/KDB.h index f58a7a5..6d0fa7a 100644 --- a/src/KDB.h +++ b/src/BayesNet/KDB.h @@ -1,7 +1,7 @@ #ifndef KDB_H #define KDB_H #include "BaseClassifier.h" -#include "utils.h" +#include "bayesnetUtils.h" namespace bayesnet { using namespace std; using namespace torch; diff --git a/src/Metrics.cc b/src/BayesNet/Metrics.cc similarity index 100% rename from src/Metrics.cc rename to src/BayesNet/Metrics.cc diff --git a/src/Metrics.hpp b/src/BayesNet/Metrics.hpp similarity index 100% rename from src/Metrics.hpp rename to src/BayesNet/Metrics.hpp diff --git a/src/Mst.cc b/src/BayesNet/Mst.cc similarity index 100% rename from src/Mst.cc rename to src/BayesNet/Mst.cc diff --git a/src/Mst.h b/src/BayesNet/Mst.h similarity index 100% rename from src/Mst.h rename to src/BayesNet/Mst.h diff --git a/src/Network.cc b/src/BayesNet/Network.cc similarity index 100% rename from src/Network.cc rename to src/BayesNet/Network.cc diff --git a/src/Network.h b/src/BayesNet/Network.h similarity index 100% rename from src/Network.h rename to src/BayesNet/Network.h diff --git a/src/Node.cc b/src/BayesNet/Node.cc similarity index 100% rename from src/Node.cc rename to src/BayesNet/Node.cc diff --git a/src/Node.h b/src/BayesNet/Node.h similarity index 100% rename from src/Node.h rename to src/BayesNet/Node.h diff --git a/src/SPODE.cc b/src/BayesNet/SPODE.cc similarity index 100% rename from src/SPODE.cc rename to src/BayesNet/SPODE.cc diff --git a/src/SPODE.h b/src/BayesNet/SPODE.h similarity index 100% rename from src/SPODE.h rename to src/BayesNet/SPODE.h diff --git a/src/TAN.cc b/src/BayesNet/TAN.cc similarity index 100% rename from src/TAN.cc rename to src/BayesNet/TAN.cc diff --git a/src/TAN.h b/src/BayesNet/TAN.h similarity index 100% rename from src/TAN.h rename to src/BayesNet/TAN.h diff --git a/src/utils.cc b/src/BayesNet/bayesnetUtils.cc similarity index 95% rename from src/utils.cc rename to src/BayesNet/bayesnetUtils.cc index fc69fb9..2e1176d 100644 --- a/src/utils.cc +++ b/src/BayesNet/bayesnetUtils.cc @@ -1,5 +1,5 @@ -#include -#include + +#include "bayesnetUtils.h" namespace bayesnet { using namespace std; using namespace torch; diff --git a/src/utils.h b/src/BayesNet/bayesnetUtils.h similarity index 60% rename from src/utils.h rename to src/BayesNet/bayesnetUtils.h index 322397e..bb03ca7 100644 --- a/src/utils.h +++ b/src/BayesNet/bayesnetUtils.h @@ -1,8 +1,11 @@ +#ifndef BAYESNET_UTILS_H +#define BAYESNET_UTILS_H +#include +#include namespace bayesnet { using namespace std; using namespace torch; vector argsort(vector& nums); - vector> tensorToVector(const Tensor& tensor); - -} \ No newline at end of file +} +#endif //BAYESNET_UTILS_H \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index 7845477..0000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_library(BayesNet utils.cc Network.cc Node.cc Metrics.cc BaseClassifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc Mst.cc) -target_link_libraries(BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/sample/ArffFiles.cc b/src/Platform/ArffFiles.cc similarity index 100% rename from sample/ArffFiles.cc rename to src/Platform/ArffFiles.cc diff --git a/sample/ArffFiles.h b/src/Platform/ArffFiles.h similarity index 100% rename from sample/ArffFiles.h rename to src/Platform/ArffFiles.h diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt new file mode 100644 index 0000000..547766b --- /dev/null +++ b/src/Platform/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) +include_directories(${BayesNet_SOURCE_DIR}/src/Platform) +add_executable(main Experiment.cc ArffFiles.cc CPPFImdlp.cpp Metrics.cpp platformUtils.cc) +target_link_libraries(main BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/sample/CPPFImdlp.cpp b/src/Platform/CPPFImdlp.cpp similarity index 100% rename from sample/CPPFImdlp.cpp rename to src/Platform/CPPFImdlp.cpp diff --git a/sample/CPPFImdlp.h b/src/Platform/CPPFImdlp.h similarity index 100% rename from sample/CPPFImdlp.h rename to src/Platform/CPPFImdlp.h diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc new file mode 100644 index 0000000..03dd8d4 --- /dev/null +++ b/src/Platform/Experiment.cc @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include +#include "ArffFiles.h" +#include "Network.h" +#include "Metrics.hpp" +#include "CPPFImdlp.h" +#include "KDB.h" +#include "SPODE.h" +#include "AODE.h" +#include "TAN.h" +#include "platformUtils.h" + + +using namespace std; + +/* print a description of all supported options */ +void usage(const char* path) +{ + /* take only the last portion of the path */ + const char* basename = strrchr(path, '/'); + basename = basename ? basename + 1 : path; + + cout << "usage: " << basename << "[OPTION]" << endl; + cout << " -h, --help\t\t Print this help and exit." << endl; + cout + << " -f, --file[=FILENAME]\t {diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}." + << endl; + cout << " -p, --path[=FILENAME]\t folder where the data files are located, default " << PATH << endl; + cout << " -m, --model={AODE, KDB, SPODE, TAN}\t " << endl; +} + +tuple parse_arguments(int argc, char** argv) +{ + string file_name; + string model_name; + string path = PATH; + const vector long_options = { + {"help", no_argument, nullptr, 'h'}, + {"file", required_argument, nullptr, 'f'}, + {"path", required_argument, nullptr, 'p'}, + {"model", required_argument, nullptr, 'm'}, + {nullptr, no_argument, nullptr, 0} + }; + while (true) { + const auto c = getopt_long(argc, argv, "hf:p:m:", long_options.data(), nullptr); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv[0]); + exit(0); + case 'f': + file_name = string(optarg); + break; + case 'm': + model_name = string(optarg); + break; + case 'p': + path = optarg; + if (path.back() != '/') + path += '/'; + break; + case '?': + usage(argv[0]); + exit(1); + default: + abort(); + } + } + if (file_name.empty()) { + usage(argv[0]); + exit(1); + } + return make_tuple(file_name, path, model_name); +} + +inline constexpr auto hash_conv(const std::string_view sv) +{ + unsigned long hash{ 5381 }; + for (unsigned char c : sv) { + hash = ((hash << 5) + hash) ^ c; + } + return hash; +} + +inline constexpr auto operator"" _sh(const char* str, size_t len) +{ + return hash_conv(std::string_view{ str, len }); +} + + + +tuple get_options(int argc, char** argv) +{ + map datasets = { + {"diabetes", true}, + {"ecoli", true}, + {"glass", true}, + {"iris", true}, + {"kdd_JapaneseVowels", false}, + {"letter", true}, + {"liver-disorders", true}, + {"mfeat-factors", true}, + }; + vector models = { "AODE", "KDB", "SPODE", "TAN" }; + string file_name; + string path; + string model_name; + tie(file_name, path, model_name) = parse_arguments(argc, argv); + if (datasets.find(file_name) == datasets.end()) { + cout << "Invalid file name: " << file_name << endl; + usage(argv[0]); + exit(1); + } + if (!file_exists(path + file_name + ".arff")) { + cout << "Data File " << path + file_name + ".arff" << " does not exist" << endl; + usage(argv[0]); + exit(1); + } + if (find(models.begin(), models.end(), model_name) == models.end()) { + cout << "Invalid model name: " << model_name << endl; + usage(argv[0]); + exit(1); + } + return { file_name, path, model_name }; +} + +int main(int argc, char** argv) +{ + string file_name, path, model_name; + tie(file_name, path, model_name) = get_options(argc, argv); + auto handler = ArffFiles(); + handler.load(path + file_name + ".arff"); + // Get Dataset X, y + vector& X = handler.getX(); + mdlp::labels_t& y = handler.getY(); + // Get className & Features + auto className = handler.getClassName(); + vector features; + for (auto feature : handler.getAttributes()) { + features.push_back(feature.first); + } + // Discretize Dataset + vector Xd; + map maxes; + tie(Xd, maxes) = discretize(X, y, features); + maxes[className] = *max_element(y.begin(), y.end()) + 1; + map> states; + for (auto feature : features) { + states[feature] = vector(maxes[feature]); + } + states[className] = vector( + maxes[className]); + double score; + vector lines; + vector graph; + auto kdb = bayesnet::KDB(2); + auto aode = bayesnet::AODE(); + auto spode = bayesnet::SPODE(2); + auto tan = bayesnet::TAN(); + switch (hash_conv(model_name)) { + case "AODE"_sh: + aode.fit(Xd, y, features, className, states); + lines = aode.show(); + score = aode.score(Xd, y); + graph = aode.graph(); + break; + case "KDB"_sh: + kdb.fit(Xd, y, features, className, states); + lines = kdb.show(); + score = kdb.score(Xd, y); + graph = kdb.graph(); + break; + case "SPODE"_sh: + spode.fit(Xd, y, features, className, states); + lines = spode.show(); + score = spode.score(Xd, y); + graph = spode.graph(); + break; + case "TAN"_sh: + tan.fit(Xd, y, features, className, states); + lines = tan.show(); + score = tan.score(Xd, y); + graph = tan.graph(); + break; + } + for (auto line : lines) { + cout << line << endl; + } + cout << "Score: " << score << endl; + auto dot_file = model_name + "_" + file_name; + ofstream file(dot_file + ".dot"); + file << graph; + file.close(); + cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; + cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; + return 0; +} \ No newline at end of file diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h new file mode 100644 index 0000000..e69de29 diff --git a/sample/Metrics.cpp b/src/Platform/Metrics.cpp similarity index 100% rename from sample/Metrics.cpp rename to src/Platform/Metrics.cpp diff --git a/sample/Metrics.h b/src/Platform/Metrics.h similarity index 100% rename from sample/Metrics.h rename to src/Platform/Metrics.h diff --git a/tests/utils.cc b/src/Platform/platformUtils.cc similarity index 58% rename from tests/utils.cc rename to src/Platform/platformUtils.cc index 7e9cafb..555a285 100644 --- a/tests/utils.cc +++ b/src/Platform/platformUtils.cc @@ -1,40 +1,51 @@ -#include "utils.h" +#include "platformUtils.h" -pair, map> discretize(vector &X, mdlp::labels_t &y, vector features) { +pair, map> discretize(vector& X, mdlp::labels_t& y, vector features) +{ vector Xd; map maxes; auto fimdlp = mdlp::CPPFImdlp(); for (int i = 0; i < X.size(); i++) { fimdlp.fit(X[i], y); - mdlp::labels_t &xd = fimdlp.transform(X[i]); + mdlp::labels_t& xd = fimdlp.transform(X[i]); maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1; Xd.push_back(xd); } - return {Xd, maxes}; + return { Xd, maxes }; } -tuple>, vector, vector, string, map>> -loadFile(string name) { +bool file_exists(const std::string& name) +{ + if (FILE* file = fopen(name.c_str(), "r")) { + fclose(file); + return true; + } else { + return false; + } +} + +tuple>, vector, vector, string, map>> loadFile(string name) +{ auto handler = ArffFiles(); handler.load(PATH + static_cast(name) + ".arff"); // Get Dataset X, y - vector &X = handler.getX(); - mdlp::labels_t &y = handler.getY(); + vector& X = handler.getX(); + mdlp::labels_t& y = handler.getY(); // Get className & Features auto className = handler.getClassName(); vector features; - for (auto feature: handler.getAttributes()) { + for (auto feature : handler.getAttributes()) { features.push_back(feature.first); } // Discretize Dataset vector Xd; map maxes; tie(Xd, maxes) = discretize(X, y, features); - maxes[className] = *max_element(y.begin(), y. end()) + 1; + maxes[className] = *max_element(y.begin(), y.end()) + 1; map> states; - for (auto feature: features) { + for (auto feature : features) { states[feature] = vector(maxes[feature]); } states[className] = vector(maxes[className]); - return {Xd, y, features, className, states}; + return { Xd, y, features, className, states }; } \ No newline at end of file diff --git a/tests/utils.h b/src/Platform/platformUtils.h similarity index 56% rename from tests/utils.h rename to src/Platform/platformUtils.h index 5a826f4..78d90bc 100644 --- a/tests/utils.h +++ b/src/Platform/platformUtils.h @@ -1,13 +1,15 @@ +#ifndef PLATFORM_UTILS_H +#define PLATFORM_UTILS_H #include #include #include #include -#include "../sample/ArffFiles.h" -#include "../sample/CPPFImdlp.h" -#ifndef BAYESNET_UTILS_H -#define BAYESNET_UTILS_H +#include "ArffFiles.h" +#include "CPPFImdlp.h" using namespace std; const string PATH = "../../data/"; -pair, map> discretize(vector &X, mdlp::labels_t &y, vector features); + +bool file_exists(const std::string& name); +pair, map> discretize(vector& X, mdlp::labels_t& y, vector features); tuple>, vector, vector, string, map>> loadFile(string name); -#endif //BAYESNET_UTILS_H +#endif //PLATFORM_UTILS_H diff --git a/sample/typesFImdlp.h b/src/Platform/typesFImdlp.h similarity index 100% rename from sample/typesFImdlp.h rename to src/Platform/typesFImdlp.h diff --git a/tests/BayesModels.cc b/tests/BayesModels.cc index 1d20edb..fb33166 100644 --- a/tests/BayesModels.cc +++ b/tests/BayesModels.cc @@ -5,11 +5,11 @@ #include #include #include -#include "../src/KDB.h" -#include "../src/TAN.h" -#include "../src/SPODE.h" -#include "../src/AODE.h" -#include "utils.h" +#include "KDB.h" +#include "TAN.h" +#include "SPODE.h" +#include "AODE.h" +#include "platformUtils.h" TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") { diff --git a/tests/BayesNetwork.cc b/tests/BayesNetwork.cc index f9052b7..6750741 100644 --- a/tests/BayesNetwork.cc +++ b/tests/BayesNetwork.cc @@ -2,8 +2,8 @@ #include #include #include -#include "../src/KDB.h" -#include "utils.h" +#include "KDB.h" +#include "platformUtils.h" TEST_CASE("Test Bayesian Network") { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3278d83..9405b09 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,8 +1,8 @@ if(ENABLE_TESTING) set(TEST_MAIN "unit_tests") - set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ../sample/ArffFiles.cc ../sample/CPPFImdlp.cpp ../sample/Metrics.cpp - ../src/utils.cc ../src/Network.cc ../src/Node.cc ../src/Metrics.cc ../src/BaseClassifier.cc ../src/KDB.cc - ../src/TAN.cc ../src/SPODE.cc ../src/Ensemble.cc ../src/AODE.cc ../src/Mst.cc utils.cc utils.h) + include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) + include_directories(${BayesNet_SOURCE_DIR}/src/Platform) + set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCES} ${Platform_SOURCES}) add_executable(${TEST_MAIN} ${TEST_SOURCES}) target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" Catch2::Catch2WithMain) add_test(NAME ${TEST_MAIN} COMMAND ${TEST_MAIN})