diff --git a/CHANGELOG.md b/CHANGELOG.md index 47d09a5..fe8c2cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Change _ascending_ hyperparameter to _order_ with these possible values _{"asc", "desc", "rand"}_, Default is _"desc"_. - Add the _predict_single_ hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true. +- sample app to show how to use the library (make sample) + +### Changed + +- Change the library structure adding folders for each group of classes (classifiers, ensembles, etc). ## [1.0.3] diff --git a/CMakeLists.txt b/CMakeLists.txt index acd389f..69156f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,10 +65,9 @@ add_git_submodule("lib/json") # -------------- add_subdirectory(config) add_subdirectory(lib/Files) +add_subdirectory(sample) add_subdirectory(src) -file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/*.cc) - # Testing # ------- if (ENABLE_TESTING) diff --git a/Makefile b/Makefile index 2cda612..a7b6725 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ SHELL := /bin/bash .DEFAULT_GOAL := help -.PHONY: coverage setup help buildr buildd test clean debug release +.PHONY: coverage setup help buildr buildd test clean debug release sample f_release = build_release f_debug = build_debug app_targets = BayesNet -test_targets = unit_tests_bayesnet +test_targets = unit_tests_bayesnet n_procs = -j 16 define ClearTests @@ -59,6 +59,12 @@ release: ## Build a Release version of the project @if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi @mkdir $(f_release); @cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release + @echo ">>> Done"; + +sample: ## Build sample + @echo ">>> Building Sample..."; + cmake --build $(f_release) -t bayesnet_sample $(n_procs) + $(f_release)/sample/bayesnet_sample tests/data/iris.arff @echo ">>> Done"; opt = "" diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt new file mode 100644 index 0000000..b56a20c --- /dev/null +++ b/sample/CMakeLists.txt @@ -0,0 +1,14 @@ +include_directories( + ${BayesNet_SOURCE_DIR}/src + ${BayesNet_SOURCE_DIR}/src/classifiers + ${BayesNet_SOURCE_DIR}/src/ensembles + ${BayesNet_SOURCE_DIR}/src/bayesian_network + ${BayesNet_SOURCE_DIR}/src/utils + ${BayesNet_SOURCE_DIR}/src/feature_selection + ${BayesNet_SOURCE_DIR}/lib/Files + ${BayesNet_SOURCE_DIR}/lib/mdlp + ${BayesNet_SOURCE_DIR}/lib/json/include + ${CMAKE_BINARY_DIR}/configured_files/include +) +add_executable(bayesnet_sample sample.cc) +target_link_libraries(bayesnet_sample ArffFiles BayesNet) \ No newline at end of file diff --git a/sample/sample.cc b/sample/sample.cc new file mode 100644 index 0000000..54b8639 --- /dev/null +++ b/sample/sample.cc @@ -0,0 +1,62 @@ +#include "ArffFiles.h" +#include "CPPFImdlp.h" +#include "BoostAODE.h" + +std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y) +{ + std::vector Xd; + auto fimdlp = mdlp::CPPFImdlp(); + for (int i = 0; i < X.size(); i++) { + fimdlp.fit(X[i], y); + mdlp::labels_t& xd = fimdlp.transform(X[i]); + Xd.push_back(xd); + } + return Xd; +} +tuple, std::string, map>> loadDataset(const std::string& name, bool class_last) +{ + auto handler = ArffFiles(); + handler.load(name, class_last); + // Get Dataset X, y + std::vector& X = handler.getX(); + mdlp::labels_t& y = handler.getY(); + // Get className & Features + auto className = handler.getClassName(); + std::vector features; + auto attributes = handler.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); + torch::Tensor Xd; + auto states = map>(); + auto Xr = discretizeDataset(X, y); + Xd = torch::zeros({ static_cast(Xr.size()), static_cast(Xr[0].size()) }, torch::kInt32); + for (int i = 0; i < features.size(); ++i) { + states[features[i]] = std::vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); + auto item = states.at(features[i]); + iota(begin(item), end(item), 0); + Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32)); + } + states[className] = std::vector(*max_element(y.begin(), y.end()) + 1); + iota(begin(states.at(className)), end(states.at(className)), 0); + return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + std::string file_name = argv[1]; + torch::Tensor X, y; + std::vector features; + std::string className; + map> states; + auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict + std::cout << "Library version: " << clf.getVersion() << std::endl; + tie(X, y, features, className, states) = loadDataset(file_name, true); + clf.fit(X, y, features, className, states); + auto score = clf.score(X, y); + std::cout << "File: " << file_name << " score: " << score << std::endl; + return 0; +} + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 461c6a9..e798319 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,10 +4,15 @@ include_directories( ${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR}/src + ${BayesNet_SOURCE_DIR}/src/feature_selection + ${BayesNet_SOURCE_DIR}/src/bayesian_network + ${BayesNet_SOURCE_DIR}/src/classifiers + ${BayesNet_SOURCE_DIR}/src/ensembles + ${BayesNet_SOURCE_DIR}/src/utils ${CMAKE_BINARY_DIR}/configured_files/include ) -add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc - KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc - Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ) +file(GLOB_RECURSE Sources "*.cc") + +add_library(BayesNet ${Sources}) target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Network.cc b/src/bayesian_network/Network.cc similarity index 100% rename from src/Network.cc rename to src/bayesian_network/Network.cc diff --git a/src/Network.h b/src/bayesian_network/Network.h similarity index 100% rename from src/Network.h rename to src/bayesian_network/Network.h diff --git a/src/Node.cc b/src/bayesian_network/Node.cc similarity index 100% rename from src/Node.cc rename to src/bayesian_network/Node.cc diff --git a/src/Node.h b/src/bayesian_network/Node.h similarity index 100% rename from src/Node.h rename to src/bayesian_network/Node.h diff --git a/src/Classifier.cc b/src/classifiers/Classifier.cc similarity index 100% rename from src/Classifier.cc rename to src/classifiers/Classifier.cc diff --git a/src/Classifier.h b/src/classifiers/Classifier.h similarity index 100% rename from src/Classifier.h rename to src/classifiers/Classifier.h diff --git a/src/KDB.cc b/src/classifiers/KDB.cc similarity index 100% rename from src/KDB.cc rename to src/classifiers/KDB.cc diff --git a/src/KDB.h b/src/classifiers/KDB.h similarity index 100% rename from src/KDB.h rename to src/classifiers/KDB.h diff --git a/src/KDBLd.cc b/src/classifiers/KDBLd.cc similarity index 100% rename from src/KDBLd.cc rename to src/classifiers/KDBLd.cc diff --git a/src/KDBLd.h b/src/classifiers/KDBLd.h similarity index 100% rename from src/KDBLd.h rename to src/classifiers/KDBLd.h diff --git a/src/Proposal.cc b/src/classifiers/Proposal.cc similarity index 100% rename from src/Proposal.cc rename to src/classifiers/Proposal.cc diff --git a/src/Proposal.h b/src/classifiers/Proposal.h similarity index 100% rename from src/Proposal.h rename to src/classifiers/Proposal.h diff --git a/src/SPODE.cc b/src/classifiers/SPODE.cc similarity index 100% rename from src/SPODE.cc rename to src/classifiers/SPODE.cc diff --git a/src/SPODE.h b/src/classifiers/SPODE.h similarity index 100% rename from src/SPODE.h rename to src/classifiers/SPODE.h diff --git a/src/SPODELd.cc b/src/classifiers/SPODELd.cc similarity index 100% rename from src/SPODELd.cc rename to src/classifiers/SPODELd.cc diff --git a/src/SPODELd.h b/src/classifiers/SPODELd.h similarity index 100% rename from src/SPODELd.h rename to src/classifiers/SPODELd.h diff --git a/src/TAN.cc b/src/classifiers/TAN.cc similarity index 100% rename from src/TAN.cc rename to src/classifiers/TAN.cc diff --git a/src/TAN.h b/src/classifiers/TAN.h similarity index 100% rename from src/TAN.h rename to src/classifiers/TAN.h diff --git a/src/TANLd.cc b/src/classifiers/TANLd.cc similarity index 100% rename from src/TANLd.cc rename to src/classifiers/TANLd.cc diff --git a/src/TANLd.h b/src/classifiers/TANLd.h similarity index 100% rename from src/TANLd.h rename to src/classifiers/TANLd.h diff --git a/src/AODE.cc b/src/ensembles/AODE.cc similarity index 100% rename from src/AODE.cc rename to src/ensembles/AODE.cc diff --git a/src/AODE.h b/src/ensembles/AODE.h similarity index 100% rename from src/AODE.h rename to src/ensembles/AODE.h diff --git a/src/AODELd.cc b/src/ensembles/AODELd.cc similarity index 100% rename from src/AODELd.cc rename to src/ensembles/AODELd.cc diff --git a/src/AODELd.h b/src/ensembles/AODELd.h similarity index 100% rename from src/AODELd.h rename to src/ensembles/AODELd.h diff --git a/src/BoostAODE.cc b/src/ensembles/BoostAODE.cc similarity index 100% rename from src/BoostAODE.cc rename to src/ensembles/BoostAODE.cc diff --git a/src/BoostAODE.h b/src/ensembles/BoostAODE.h similarity index 100% rename from src/BoostAODE.h rename to src/ensembles/BoostAODE.h diff --git a/src/Ensemble.cc b/src/ensembles/Ensemble.cc similarity index 100% rename from src/Ensemble.cc rename to src/ensembles/Ensemble.cc diff --git a/src/Ensemble.h b/src/ensembles/Ensemble.h similarity index 100% rename from src/Ensemble.h rename to src/ensembles/Ensemble.h diff --git a/src/CFS.cc b/src/feature_selection/CFS.cc similarity index 100% rename from src/CFS.cc rename to src/feature_selection/CFS.cc diff --git a/src/CFS.h b/src/feature_selection/CFS.h similarity index 100% rename from src/CFS.h rename to src/feature_selection/CFS.h diff --git a/src/FCBF.cc b/src/feature_selection/FCBF.cc similarity index 100% rename from src/FCBF.cc rename to src/feature_selection/FCBF.cc diff --git a/src/FCBF.h b/src/feature_selection/FCBF.h similarity index 100% rename from src/FCBF.h rename to src/feature_selection/FCBF.h diff --git a/src/FeatureSelect.cc b/src/feature_selection/FeatureSelect.cc similarity index 100% rename from src/FeatureSelect.cc rename to src/feature_selection/FeatureSelect.cc diff --git a/src/FeatureSelect.h b/src/feature_selection/FeatureSelect.h similarity index 100% rename from src/FeatureSelect.h rename to src/feature_selection/FeatureSelect.h diff --git a/src/IWSS.cc b/src/feature_selection/IWSS.cc similarity index 100% rename from src/IWSS.cc rename to src/feature_selection/IWSS.cc diff --git a/src/IWSS.h b/src/feature_selection/IWSS.h similarity index 100% rename from src/IWSS.h rename to src/feature_selection/IWSS.h diff --git a/src/BayesMetrics.cc b/src/utils/BayesMetrics.cc similarity index 100% rename from src/BayesMetrics.cc rename to src/utils/BayesMetrics.cc diff --git a/src/BayesMetrics.h b/src/utils/BayesMetrics.h similarity index 100% rename from src/BayesMetrics.h rename to src/utils/BayesMetrics.h diff --git a/src/Mst.cc b/src/utils/Mst.cc similarity index 100% rename from src/Mst.cc rename to src/utils/Mst.cc diff --git a/src/Mst.h b/src/utils/Mst.h similarity index 100% rename from src/Mst.h rename to src/utils/Mst.h diff --git a/src/bayesnetUtils.cc b/src/utils/bayesnetUtils.cc similarity index 100% rename from src/bayesnetUtils.cc rename to src/utils/bayesnetUtils.cc diff --git a/src/bayesnetUtils.h b/src/utils/bayesnetUtils.h similarity index 100% rename from src/bayesnetUtils.h rename to src/utils/bayesnetUtils.h diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index efccc48..630beab 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,13 +2,18 @@ if(ENABLE_TESTING) set(TEST_BAYESNET "unit_tests_bayesnet") include_directories( ${BayesNet_SOURCE_DIR}/src - ${BayesNet_SOURCE_DIR}/src/Platform + ${BayesNet_SOURCE_DIR}/src/feature_selection + ${BayesNet_SOURCE_DIR}/src/bayesian_network + ${BayesNet_SOURCE_DIR}/src/classifiers + ${BayesNet_SOURCE_DIR}/src/utils + ${BayesNet_SOURCE_DIR}/src/ensembles ${BayesNet_SOURCE_DIR}/lib/Files ${BayesNet_SOURCE_DIR}/lib/mdlp ${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/json/include ${CMAKE_BINARY_DIR}/configured_files/include ) + file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/src/*.cc") set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES}) add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET}) target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )