Compare commits

..

4 Commits

11 changed files with 33 additions and 51 deletions

View File

@@ -1,6 +1,6 @@
FROM mcr.microsoft.com/devcontainers/cpp:ubuntu22.04 FROM mcr.microsoft.com/devcontainers/cpp:ubuntu22.04
ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.22.2" ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.29.3"
# Optionally install the cmake for vcpkg # Optionally install the cmake for vcpkg
COPY ./reinstall-cmake.sh /tmp/ COPY ./reinstall-cmake.sh /tmp/
@@ -23,7 +23,7 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test
RUN apt-get update RUN apt-get update
# Install GCC 13.1 # Install GCC 13.1
RUN apt-get install -y gcc-13 g++-13 RUN apt-get install -y gcc-13 g++-13 doxygen
# Install lcov 2.1 # Install lcov 2.1
RUN wget --quiet https://github.com/linux-test-project/lcov/releases/download/v2.1/lcov-2.1.tar.gz && \ RUN wget --quiet https://github.com/linux-test-project/lcov/releases/download/v2.1/lcov-2.1.tar.gz && \

View File

@@ -51,15 +51,10 @@ endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
if (CODE_COVERAGE) if (CODE_COVERAGE)
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
message("ALL LANGUAGES: ${LANGUAGES}")
foreach(LANG ${LANGUAGES})
message("${LANG} compiler is \"${CMAKE_${LANG}_COMPILER_ID}\"")
endforeach()
enable_testing() enable_testing()
#include(CodeCoverage) include(CodeCoverage)
#MESSAGE("Code coverage enabled") MESSAGE("Code coverage enabled")
#SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage") SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE) endif (CODE_COVERAGE)
if (ENABLE_CLANG_TIDY) if (ENABLE_CLANG_TIDY)
@@ -98,10 +93,14 @@ install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DES
# Documentation # Documentation
# ------------- # -------------
find_package(Doxygen) find_package(Doxygen)
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs) if (Doxygen_FOUND)
set(doxyfile_in ${DOC_DIR}/Doxyfile.in) set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
set(doxyfile ${DOC_DIR}/Doxyfile) set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
configure_file(${doxyfile_in} ${doxyfile} @ONLY) set(doxyfile ${DOC_DIR}/Doxyfile)
doxygen_add_docs(doxygen configure_file(${doxyfile_in} ${doxyfile} @ONLY)
WORKING_DIRECTORY ${DOC_DIR} doxygen_add_docs(doxygen
WORKING_DIRECTORY ${DOC_DIR}
CONFIG_FILE ${doxyfile}) CONFIG_FILE ${doxyfile})
else (Doxygen_FOUND)
MESSAGE("* Doxygen not found")
endif (Doxygen_FOUND)

View File

@@ -58,10 +58,10 @@ diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg @$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
buildd: ## Build the debug targets buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) --parallel cmake --build $(f_debug) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
buildr: ## Build the release targets buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) --parallel cmake --build $(f_release) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
clean: ## Clean the tests info clean: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests..."; @echo ">>> Cleaning Debug BayesNet tests...";
@@ -105,7 +105,7 @@ opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet tests..."; @echo ">>> Running BayesNet tests...";
@$(MAKE) clean @$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) --parallel @cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
@for t in $(test_targets); do \ @for t in $(test_targets); do \
echo ">>> Running $$t...";\ echo ">>> Running $$t...";\
if [ -f $(f_debug)/tests/$$t ]; then \ if [ -f $(f_debug)/tests/$$t ]; then \

View File

@@ -9,15 +9,7 @@
#include "Classifier.h" #include "Classifier.h"
namespace bayesnet { namespace bayesnet {
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false), device(torch::kCPU) Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
{
if (torch::cuda::is_available()) {
device = torch::Device(torch::kCUDA);
std::cout << "CUDA is available! Using GPU." << std::endl;
} else {
std::cout << "CUDA is not available. Using CPU." << std::endl;
}
}
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
@@ -39,7 +31,7 @@ namespace bayesnet {
{ {
try { try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1); auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
dataset = torch::cat({ dataset, yresized }, 0).to(device); dataset = torch::cat({ dataset, yresized }, 0);
} }
catch (const std::exception& e) { catch (const std::exception& e) {
std::stringstream oss; std::stringstream oss;
@@ -58,7 +50,7 @@ namespace bayesnet {
{ {
dataset = X; dataset = X;
buildDataset(y); buildDataset(y);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble).to(device); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights, smoothing); return build(features, className, states, weights, smoothing);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples

View File

@@ -38,7 +38,6 @@ namespace bayesnet {
std::string dump_cpt() const override; std::string dump_cpt() const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
protected: protected:
torch::Device device;
bool fitted; bool fitted;
unsigned int m, n; // m: number of samples, n: number of features unsigned int m, n; // m: number of samples, n: number of features
Network model; Network model;

View File

@@ -97,7 +97,7 @@ namespace bayesnet {
dimensions.push_back(numStates); dimensions.push_back(numStates);
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
// Create a tensor of zeros with the dimensions of the CPT // Create a tensor of zeros with the dimensions of the CPT
cpTable = torch::zeros(dimensions, torch::kDouble).to(device) + smoothing; cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing;
// Fill table with counts // Fill table with counts
auto pos = find(features.begin(), features.end(), name); auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) { if (pos == features.end()) {

View File

@@ -7,7 +7,6 @@
#include <ArffFiles.hpp> #include <ArffFiles.hpp>
#include <CPPFImdlp.h> #include <CPPFImdlp.h>
#include <bayesnet/ensembles/BoostAODE.h> #include <bayesnet/ensembles/BoostAODE.h>
#include <torch/torch.h>
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y) std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{ {
@@ -20,8 +19,7 @@ std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, m
} }
return Xd; return Xd;
} }
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last, torch::Device device)
{ {
auto handler = ArffFiles(); auto handler = ArffFiles();
handler.load(name, class_last); handler.load(name, class_last);
@@ -36,16 +34,16 @@ tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<s
torch::Tensor Xd; torch::Tensor Xd;
auto states = map<std::string, std::vector<int>>(); auto states = map<std::string, std::vector<int>>();
auto Xr = discretizeDataset(X, y); auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32).to(device); Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1); states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
auto item = states.at(features[i]); auto item = states.at(features[i]);
iota(begin(item), end(item), 0); iota(begin(item), end(item), 0);
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32).to(device)); Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
} }
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1); states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0); iota(begin(states.at(className)), end(states.at(className)), 0);
return { Xd, torch::tensor(y, torch::kInt32).to(device), features, className, states }; return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
} }
int main(int argc, char* argv[]) int main(int argc, char* argv[])
@@ -55,22 +53,16 @@ int main(int argc, char* argv[])
return 1; return 1;
} }
std::string file_name = argv[1]; std::string file_name = argv[1];
torch::Device device(torch::kCPU);
if (torch::cuda::is_available()) {
device = torch::Device(torch::kCUDA);
std::cout << "CUDA is available! Using GPU." << std::endl;
} else {
std::cout << "CUDA is not available. Using CPU." << std::endl;
}
torch::Tensor X, y; torch::Tensor X, y;
std::vector<std::string> features; std::vector<std::string> features;
std::string className; std::string className;
map<std::string, std::vector<int>> states; map<std::string, std::vector<int>> states;
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
std::cout << "Library version: " << clf.getVersion() << std::endl; std::cout << "Library version: " << clf.getVersion() << std::endl;
tie(X, y, features, className, states) = loadDataset(file_name, true, device); tie(X, y, features, className, states) = loadDataset(file_name, true);
clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE); clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE);
auto score = clf.score(X, y); auto score = clf.score(X, y);
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl; std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
return 0; return 0;
} }