Merge pull request 'smoothing' (#30) from smoothing into main

Reviewed-on: #30
This commit is contained in:
Ricardo Montañana Gómez 2024-09-12 20:28:33 +00:00
commit d0955d9369
44 changed files with 534 additions and 314 deletions

6
.vscode/launch.json vendored
View File

@ -14,11 +14,11 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "test", "name": "test",
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet", "program": "${workspaceFolder}/build_Debug/tests/TestBayesNet",
"args": [ "args": [
"[Node]" "[Network]"
], ],
"cwd": "${workspaceFolder}/build_debug/tests" "cwd": "${workspaceFolder}/build_Debug/tests"
}, },
{ {
"name": "(gdb) Launch", "name": "(gdb) Launch",

View File

@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add tests to check the correct version of the mdlp, folding and json libraries. - Add tests to check the correct version of the mdlp, folding and json libraries.
- Library documentation generated with Doxygen. - Library documentation generated with Doxygen.
- Link to documentation in the README.md. - Link to documentation in the README.md.
- Three types of smoothing the Bayesian Network OLD_LAPLACE, LAPLACE and CESTNIK.
### Internal ### Internal

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(BayesNet project(BayesNet
VERSION 1.0.5.1 VERSION 1.0.6
DESCRIPTION "Bayesian Network and basic classifiers Library." DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX LANGUAGES CXX
@ -26,7 +26,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
endif() endif()

View File

@ -12,7 +12,6 @@ plantuml = plantuml
lcov = lcov lcov = lcov
genhtml = genhtml genhtml = genhtml
dot = dot dot = dot
n_procs = -j 16
docsrcdir = docs/manual docsrcdir = docs/manual
mansrcdir = docs/man3 mansrcdir = docs/man3
mandestdir = /usr/local/share/man mandestdir = /usr/local/share/man
@ -59,10 +58,10 @@ diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg @$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
buildd: ## Build the debug targets buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) $(n_procs) cmake --build $(f_debug) -t $(app_targets) --parallel
buildr: ## Build the release targets buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) $(n_procs) cmake --build $(f_release) -t $(app_targets) --parallel
clean: ## Clean the tests info clean: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests..."; @echo ">>> Cleaning Debug BayesNet tests...";
@ -106,7 +105,7 @@ opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet tests..."; @echo ">>> Running BayesNet tests...";
@$(MAKE) clean @$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) $(n_procs) @cmake --build $(f_debug) -t $(test_targets) --parallel
@for t in $(test_targets); do \ @for t in $(test_targets); do \
echo ">>> Running $$t...";\ echo ">>> Running $$t...";\
if [ -f $(f_debug)/tests/$$t ]; then \ if [ -f $(f_debug)/tests/$$t ]; then \
@ -119,7 +118,7 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
coverage: ## Run tests and generate coverage report (build/index.html) coverage: ## Run tests and generate coverage report (build/index.html)
@echo ">>> Building tests with coverage..." @echo ">>> Building tests with coverage..."
@which $(lcov) || (echo ">>> Please install lcov"; exit 1) @which $(lcov) || (echo ">>ease install lcov"; exit 1)
@if [ ! -f $(f_debug)/tests/coverage.info ] ; then $(MAKE) test ; fi @if [ ! -f $(f_debug)/tests/coverage.info ] ; then $(MAKE) test ; fi
@echo ">>> Building report..." @echo ">>> Building report..."
@cd $(f_debug)/tests; \ @cd $(f_debug)/tests; \

View File

@ -7,7 +7,7 @@
[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
[![Coverage Badge](https://img.shields.io/badge/Coverage-97,3%25-green)](html/index.html) [![Coverage Badge](https://img.shields.io/badge/Coverage-97,1%25-green)](html/index.html)
Bayesian Network Classifiers using libtorch from scratch Bayesian Network Classifiers using libtorch from scratch

View File

@ -8,16 +8,18 @@
#include <vector> #include <vector>
#include <torch/torch.h> #include <torch/torch.h>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "bayesnet/network/Network.h"
namespace bayesnet { namespace bayesnet {
enum status_t { NORMAL, WARNING, ERROR }; enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier { class BaseClassifier {
public: public:
// X is nxm std::vector, y is nx1 std::vector // X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0; virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
// X is nxm tensor, y is nx1 tensor // X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
virtual ~BaseClassifier() = default; virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0; torch::Tensor virtual predict(torch::Tensor& X) = 0;
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0; std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
@ -39,7 +41,7 @@ namespace bayesnet {
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0; virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; } std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
protected: protected:
virtual void trainModel(const torch::Tensor& weights) = 0; virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
std::vector<std::string> validHyperparameters; std::vector<std::string> validHyperparameters;
}; };
} }

View File

@ -1,5 +1,5 @@
include_directories( include_directories(
${BayesNet_SOURCE_DIR}/lib/mdlp ${BayesNet_SOURCE_DIR}/lib/mdlp/src
${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR} ${BayesNet_SOURCE_DIR}

View File

@ -11,7 +11,7 @@
namespace bayesnet { namespace bayesnet {
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
this->features = features; this->features = features;
this->className = className; this->className = className;
@ -23,7 +23,7 @@ namespace bayesnet {
metrics = Metrics(dataset, features, className, n_classes); metrics = Metrics(dataset, features, className, n_classes);
model.initialize(); model.initialize();
buildModel(weights); buildModel(weights);
trainModel(weights); trainModel(weights, smoothing);
fitted = true; fitted = true;
return *this; return *this;
} }
@ -41,20 +41,20 @@ namespace bayesnet {
throw std::runtime_error(oss.str()); throw std::runtime_error(oss.str());
} }
} }
void Classifier::trainModel(const torch::Tensor& weights) void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing)
{ {
model.fit(dataset, weights, features, className, states); model.fit(dataset, weights, features, className, states, smoothing);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
dataset = X; dataset = X;
buildDataset(y); buildDataset(y);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32); dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
for (int i = 0; i < X.size(); ++i) { for (int i = 0; i < X.size(); ++i) {
@ -63,18 +63,18 @@ namespace bayesnet {
auto ytmp = torch::tensor(y, torch::kInt32); auto ytmp = torch::tensor(y, torch::kInt32);
buildDataset(ytmp); buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
this->dataset = dataset; this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
this->dataset = dataset; this->dataset = dataset;
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
void Classifier::checkFitParameters() void Classifier::checkFitParameters()
{ {

View File

@ -8,7 +8,6 @@
#define CLASSIFIER_H #define CLASSIFIER_H
#include <torch/torch.h> #include <torch/torch.h>
#include "bayesnet/utils/BayesMetrics.h" #include "bayesnet/utils/BayesMetrics.h"
#include "bayesnet/network/Network.h"
#include "bayesnet/BaseClassifier.h" #include "bayesnet/BaseClassifier.h"
namespace bayesnet { namespace bayesnet {
@ -16,10 +15,10 @@ namespace bayesnet {
public: public:
Classifier(Network model); Classifier(Network model);
virtual ~Classifier() = default; virtual ~Classifier() = default;
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override; Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
void addNodes(); void addNodes();
int getNumberOfNodes() const override; int getNumberOfNodes() const override;
int getNumberOfEdges() const override; int getNumberOfEdges() const override;
@ -51,10 +50,10 @@ namespace bayesnet {
std::vector<std::string> notes; // Used to store messages occurred during the fit process std::vector<std::string> notes; // Used to store messages occurred during the fit process
void checkFitParameters(); void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0; virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
void buildDataset(torch::Tensor& y); void buildDataset(torch::Tensor& y);
private: private:
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
}; };
} }
#endif #endif

View File

@ -8,7 +8,7 @@
namespace bayesnet { namespace bayesnet {
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
@ -19,7 +19,7 @@ namespace bayesnet {
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
KDB::fit(dataset, features, className, states); KDB::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;
} }

View File

@ -15,7 +15,7 @@ namespace bayesnet {
public: public:
explicit KDBLd(int k); explicit KDBLd(int k);
virtual ~KDBLd() = default; virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override; std::vector<std::string> graph(const std::string& name = "KDB") const override;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };

View File

@ -70,7 +70,7 @@ namespace bayesnet {
states[pFeatures[index]] = xStates; states[pFeatures[index]] = xStates;
} }
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
model.fit(pDataset, weights, pFeatures, pClassName, states); model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
} }
return states; return states;
} }

View File

@ -8,25 +8,25 @@
namespace bayesnet { namespace bayesnet {
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
Xf = X_; Xf = X_;
y = y_; y = y_;
return commonFit(features_, className_, states_); return commonFit(features_, className_, states_, smoothing);
} }
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
if (!torch::is_floating_point(dataset)) { if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor"); throw std::runtime_error("Dataset must be a floating point tensor");
} }
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
return commonFit(features_, className_, states_); return commonFit(features_, className_, states_, smoothing);
} }
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
features = features_; features = features_;
className = className_; className = className_;
@ -34,7 +34,7 @@ namespace bayesnet {
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states); SPODE::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;
} }

View File

@ -14,10 +14,10 @@ namespace bayesnet {
public: public:
explicit SPODELd(int root); explicit SPODELd(int root);
virtual ~SPODELd() = default; virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states); SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<std::string> graph(const std::string& name = "SPODE") const override; std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };
}; };

View File

@ -8,7 +8,7 @@
namespace bayesnet { namespace bayesnet {
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
@ -19,7 +19,7 @@ namespace bayesnet {
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(dataset, features, className, states); TAN::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;

View File

@ -15,10 +15,9 @@ namespace bayesnet {
public: public:
TANLd(); TANLd();
virtual ~TANLd() = default; virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
std::vector<std::string> graph(const std::string& name = "TAN") const override; std::vector<std::string> graph(const std::string& name = "TANLd") const override;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
}; };
} }
#endif // !TANLD_H #endif // !TANLD_H

View File

@ -10,7 +10,7 @@ namespace bayesnet {
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className) AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
{ {
} }
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
@ -21,7 +21,7 @@ namespace bayesnet {
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
Ensemble::fit(dataset, features, className, states); Ensemble::fit(dataset, features, className, states, smoothing);
return *this; return *this;
} }
@ -34,10 +34,10 @@ namespace bayesnet {
n_models = models.size(); n_models = models.size();
significanceModels = std::vector<double>(n_models, 1.0); significanceModels = std::vector<double>(n_models, 1.0);
} }
void AODELd::trainModel(const torch::Tensor& weights) void AODELd::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
for (const auto& model : models) { for (const auto& model : models) {
model->fit(Xf, y, features, className, states); model->fit(Xf, y, features, className, states, smoothing);
} }
} }
std::vector<std::string> AODELd::graph(const std::string& name) const std::vector<std::string> AODELd::graph(const std::string& name) const

View File

@ -15,10 +15,10 @@ namespace bayesnet {
public: public:
AODELd(bool predict_voting = true); AODELd(bool predict_voting = true);
virtual ~AODELd() = default; virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override; AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
std::vector<std::string> graph(const std::string& name = "AODELd") const override; std::vector<std::string> graph(const std::string& name = "AODELd") const override;
protected: protected:
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
}; };
} }

View File

@ -19,7 +19,7 @@ namespace bayesnet {
BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting) BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting)
{ {
} }
std::vector<int> BoostA2DE::initializeModels() std::vector<int> BoostA2DE::initializeModels(const Smoothing_t smoothing)
{ {
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
std::vector<int> featuresSelected = featureSelection(weights_); std::vector<int> featuresSelected = featureSelection(weights_);
@ -32,7 +32,7 @@ namespace bayesnet {
for (int j = i + 1; j < featuresSelected.size(); j++) { for (int j = i + 1; j < featuresSelected.size(); j++) {
auto parents = { featuresSelected[i], featuresSelected[j] }; auto parents = { featuresSelected[i], featuresSelected[j] };
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents); std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_, smoothing);
models.push_back(std::move(model)); models.push_back(std::move(model));
significanceModels.push_back(1.0); // They will be updated later in trainModel significanceModels.push_back(1.0); // They will be updated later in trainModel
n_models++; n_models++;
@ -41,7 +41,7 @@ namespace bayesnet {
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
return featuresSelected; return featuresSelected;
} }
void BoostA2DE::trainModel(const torch::Tensor& weights) void BoostA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
// //
// Logging setup // Logging setup
@ -58,7 +58,7 @@ namespace bayesnet {
bool finished = false; bool finished = false;
std::vector<int> featuresUsed; std::vector<int> featuresUsed;
if (selectFeatures) { if (selectFeatures) {
featuresUsed = initializeModels(); featuresUsed = initializeModels(smoothing);
auto ypred = predict(X_train); auto ypred = predict(X_train);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
// Update significance of the models // Update significance of the models
@ -96,7 +96,7 @@ namespace bayesnet {
pairSelection.erase(pairSelection.begin()); pairSelection.erase(pairSelection.begin());
std::unique_ptr<Classifier> model; std::unique_ptr<Classifier> model;
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second })); model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_, smoothing);
alpha_t = 0.0; alpha_t = 0.0;
if (!block_update) { if (!block_update) {
auto ypred = model->predict(X_train); auto ypred = model->predict(X_train);

View File

@ -17,9 +17,9 @@ namespace bayesnet {
virtual ~BoostA2DE() = default; virtual ~BoostA2DE() = default;
std::vector<std::string> graph(const std::string& title = "BoostA2DE") const override; std::vector<std::string> graph(const std::string& title = "BoostA2DE") const override;
protected: protected:
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
private: private:
std::vector<int> initializeModels(); std::vector<int> initializeModels(const Smoothing_t smoothing);
}; };
} }
#endif #endif

View File

@ -16,13 +16,13 @@ namespace bayesnet {
BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting) BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
{ {
} }
std::vector<int> BoostAODE::initializeModels() std::vector<int> BoostAODE::initializeModels(const Smoothing_t smoothing)
{ {
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
std::vector<int> featuresSelected = featureSelection(weights_); std::vector<int> featuresSelected = featureSelection(weights_);
for (const int& feature : featuresSelected) { for (const int& feature : featuresSelected) {
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature); std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_, smoothing);
models.push_back(std::move(model)); models.push_back(std::move(model));
significanceModels.push_back(1.0); // They will be updated later in trainModel significanceModels.push_back(1.0); // They will be updated later in trainModel
n_models++; n_models++;
@ -30,7 +30,7 @@ namespace bayesnet {
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
return featuresSelected; return featuresSelected;
} }
void BoostAODE::trainModel(const torch::Tensor& weights) void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
// //
// Logging setup // Logging setup
@ -47,7 +47,7 @@ namespace bayesnet {
bool finished = false; bool finished = false;
std::vector<int> featuresUsed; std::vector<int> featuresUsed;
if (selectFeatures) { if (selectFeatures) {
featuresUsed = initializeModels(); featuresUsed = initializeModels(smoothing);
auto ypred = predict(X_train); auto ypred = predict(X_train);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
// Update significance of the models // Update significance of the models
@ -89,7 +89,7 @@ namespace bayesnet {
featureSelection.erase(featureSelection.begin()); featureSelection.erase(featureSelection.begin());
std::unique_ptr<Classifier> model; std::unique_ptr<Classifier> model;
model = std::make_unique<SPODE>(feature); model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_, smoothing);
alpha_t = 0.0; alpha_t = 0.0;
if (!block_update) { if (!block_update) {
auto ypred = model->predict(X_train); auto ypred = model->predict(X_train);

View File

@ -18,9 +18,9 @@ namespace bayesnet {
virtual ~BoostAODE() = default; virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override; std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
protected: protected:
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
private: private:
std::vector<int> initializeModels(); std::vector<int> initializeModels(const Smoothing_t smoothing);
}; };
} }
#endif #endif

View File

@ -3,22 +3,21 @@
// SPDX-FileType: SOURCE // SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // ***************************************************************
#include "Ensemble.h" #include "Ensemble.h"
#include "bayesnet/utils/CountingSemaphore.h"
namespace bayesnet { namespace bayesnet {
Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting)
{ {
}; };
const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted";
void Ensemble::trainModel(const torch::Tensor& weights) void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
n_models = models.size(); n_models = models.size();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
// fit with std::vectors // fit with std::vectors
models[i]->fit(dataset, features, className, states); models[i]->fit(dataset, features, className, states, smoothing);
} }
} }
std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X) std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X)
@ -85,17 +84,9 @@ namespace bayesnet {
{ {
auto n_states = models[0]->getClassNumStates(); auto n_states = models[0]->getClassNumStates();
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32); torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() { auto ypredict = models[i]->predict_proba(X);
auto ypredict = models[i]->predict_proba(X); y_pred += ypredict * significanceModels[i];
std::lock_guard<std::mutex> lock(mtx);
y_pred += ypredict * significanceModels[i];
}));
}
for (auto& thread : threads) {
thread.join();
} }
auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
y_pred /= sum; y_pred /= sum;
@ -105,23 +96,15 @@ namespace bayesnet {
{ {
auto n_states = models[0]->getClassNumStates(); auto n_states = models[0]->getClassNumStates();
std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0)); std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0));
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() { auto ypredict = models[i]->predict_proba(X);
auto ypredict = models[i]->predict_proba(X); assert(ypredict.size() == y_pred.size());
assert(ypredict.size() == y_pred.size()); assert(ypredict[0].size() == y_pred[0].size());
assert(ypredict[0].size() == y_pred[0].size()); // Multiply each prediction by the significance of the model and then add it to the final prediction
std::lock_guard<std::mutex> lock(mtx); for (auto j = 0; j < ypredict.size(); ++j) {
// Multiply each prediction by the significance of the model and then add it to the final prediction std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
for (auto j = 0; j < ypredict.size(); ++j) { [significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(), }
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
}
}));
}
for (auto& thread : threads) {
thread.join();
} }
auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
//Divide each element of the prediction by the sum of the significances //Divide each element of the prediction by the sum of the significances
@ -141,17 +124,9 @@ namespace bayesnet {
{ {
// Build a m x n_models tensor with the predictions of each model // Build a m x n_models tensor with the predictions of each model
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() { auto ypredict = models[i]->predict(X);
auto ypredict = models[i]->predict(X); y_pred.index_put_({ "...", i }, ypredict);
std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
} }
return voting(y_pred); return voting(y_pred);
} }

View File

@ -46,7 +46,7 @@ namespace bayesnet {
unsigned n_models; unsigned n_models;
std::vector<std::unique_ptr<Classifier>> models; std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels; std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
bool predict_voting; bool predict_voting;
}; };
} }

View File

@ -5,20 +5,20 @@
// *************************************************************** // ***************************************************************
#include <thread> #include <thread>
#include <mutex>
#include <sstream> #include <sstream>
#include <numeric>
#include <algorithm>
#include "Network.h" #include "Network.h"
#include "bayesnet/utils/bayesnetUtils.h" #include "bayesnet/utils/bayesnetUtils.h"
#include "bayesnet/utils/CountingSemaphore.h"
#include <pthread.h>
#include <fstream>
namespace bayesnet { namespace bayesnet {
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 } Network::Network() : fitted{ false }, classNumStates{ 0 }
{ {
} }
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 } Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
{ fitted(other.fitted), samples(other.samples)
}
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
{ {
if (samples.defined()) if (samples.defined())
samples = samples.clone(); samples = samples.clone();
@ -35,16 +35,15 @@ namespace bayesnet {
nodes.clear(); nodes.clear();
samples = torch::Tensor(); samples = torch::Tensor();
} }
float Network::getMaxThreads() const
{
return maxThreads;
}
torch::Tensor& Network::getSamples() torch::Tensor& Network::getSamples()
{ {
return samples; return samples;
} }
void Network::addNode(const std::string& name) void Network::addNode(const std::string& name)
{ {
if (fitted) {
throw std::invalid_argument("Cannot add node to a fitted network. Initialize first.");
}
if (name == "") { if (name == "") {
throw std::invalid_argument("Node name cannot be empty"); throw std::invalid_argument("Node name cannot be empty");
} }
@ -94,12 +93,21 @@ namespace bayesnet {
} }
void Network::addEdge(const std::string& parent, const std::string& child) void Network::addEdge(const std::string& parent, const std::string& child)
{ {
if (fitted) {
throw std::invalid_argument("Cannot add edge to a fitted network. Initialize first.");
}
if (nodes.find(parent) == nodes.end()) { if (nodes.find(parent) == nodes.end()) {
throw std::invalid_argument("Parent node " + parent + " does not exist"); throw std::invalid_argument("Parent node " + parent + " does not exist");
} }
if (nodes.find(child) == nodes.end()) { if (nodes.find(child) == nodes.end()) {
throw std::invalid_argument("Child node " + child + " does not exist"); throw std::invalid_argument("Child node " + child + " does not exist");
} }
// Check if the edge is already in the graph
for (auto& node : nodes[parent]->getChildren()) {
if (node->getName() == child) {
throw std::invalid_argument("Edge " + parent + " -> " + child + " already exists");
}
}
// Temporarily add edge to check for cycles // Temporarily add edge to check for cycles
nodes[parent]->addChild(nodes[child].get()); nodes[parent]->addChild(nodes[child].get());
nodes[child]->addParent(nodes[parent].get()); nodes[child]->addParent(nodes[parent].get());
@ -155,7 +163,7 @@ namespace bayesnet {
classNumStates = nodes.at(className)->getNumStates(); classNumStates = nodes.at(className)->getNumStates();
} }
// X comes in nxm, where n is the number of features and m the number of samples // X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className; this->className = className;
@ -164,17 +172,17 @@ namespace bayesnet {
for (int i = 0; i < featureNames.size(); ++i) { for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." }); auto row_feature = X.index({ i, "..." });
} }
completeFit(states, weights); completeFit(states, weights, smoothing);
} }
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className; this->className = className;
this->samples = samples; this->samples = samples;
completeFit(states, weights); completeFit(states, weights, smoothing);
} }
// input_data comes in nxm, where n is the number of features and m the number of samples // input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
@ -185,21 +193,57 @@ namespace bayesnet {
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
} }
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights); completeFit(states, weights, smoothing);
} }
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
setStates(states); setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
std::vector<std::thread> threads; std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
const double n_samples = static_cast<double>(samples.size(1));
auto worker = [&](std::pair<const std::string, std::unique_ptr<Node>>& node, int i) {
std::string threadName = "FitWorker-" + std::to_string(i);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
double numStates = static_cast<double>(node.second->getNumStates());
double smoothing_factor = 0.0;
switch (smoothing) {
case Smoothing_t::ORIGINAL:
smoothing_factor = 1.0 / n_samples;
break;
case Smoothing_t::LAPLACE:
smoothing_factor = 1.0;
break;
case Smoothing_t::CESTNIK:
smoothing_factor = 1 / numStates;
break;
default:
throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast<int>(smoothing)));
}
node.second->computeCPT(samples, features, smoothing_factor, weights);
semaphore.release();
};
int i = 0;
for (auto& node : nodes) { for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() { semaphore.acquire();
node.second->computeCPT(samples, features, laplaceSmoothing, weights); threads.emplace_back(worker, std::ref(node), i++);
});
} }
for (auto& thread : threads) { for (auto& thread : threads) {
thread.join(); thread.join();
} }
// std::fstream file;
// file.open("cpt.txt", std::fstream::out | std::fstream::app);
// file << std::string(80, '*') << std::endl;
// for (const auto& item : graph("Test")) {
// file << item << std::endl;
// }
// file << std::string(80, '-') << std::endl;
// file << dump_cpt() << std::endl;
// file << std::string(80, '=') << std::endl;
// file.close();
fitted = true; fitted = true;
} }
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
@ -207,14 +251,38 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw std::logic_error("You must call fit() before calling predict()"); throw std::logic_error("You must call fit() before calling predict()");
} }
// Ensure the sample size is equal to the number of features
if (samples.size(0) != features.size() - 1) {
throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
torch::Tensor result; torch::Tensor result;
std::vector<std::thread> threads;
std::mutex mtx;
auto& semaphore = CountingSemaphore::getInstance();
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) { auto worker = [&](const torch::Tensor& sample, int i) {
const torch::Tensor sample = samples.index({ "...", i }); std::string threadName = "PredictWorker-" + std::to_string(i);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
auto psample = predict_sample(sample); auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64); auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); {
result.index_put_({ i, "..." }, temp); std::lock_guard<std::mutex> lock(mtx);
result.index_put_({ i, "..." }, temp);
}
semaphore.release();
};
for (int i = 0; i < samples.size(1); ++i) {
semaphore.acquire();
const torch::Tensor sample = samples.index({ "...", i });
threads.emplace_back(worker, sample, i);
}
for (auto& thread : threads) {
thread.join();
} }
if (proba) if (proba)
return result; return result;
@ -239,18 +307,38 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw std::logic_error("You must call fit() before calling predict()"); throw std::logic_error("You must call fit() before calling predict()");
} }
std::vector<int> predictions; // Ensure the sample size is equal to the number of features
if (tsamples.size() != features.size() - 1) {
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::vector<int> predictions(tsamples[0].size(), 0);
std::vector<int> sample; std::vector<int> sample;
std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
auto worker = [&](const std::vector<int>& sample, const int row, int& prediction) {
std::string threadName = "(V)PWorker-" + std::to_string(row);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
auto classProbabilities = predict_sample(sample);
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
int predictedClass = distance(classProbabilities.begin(), maxElem);
prediction = predictedClass;
semaphore.release();
};
for (int row = 0; row < tsamples[0].size(); ++row) { for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear(); sample.clear();
for (int col = 0; col < tsamples.size(); ++col) { for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]); sample.push_back(tsamples[col][row]);
} }
std::vector<double> classProbabilities = predict_sample(sample); semaphore.acquire();
// Find the class with the maximum posterior probability threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); }
int predictedClass = distance(classProbabilities.begin(), maxElem); for (auto& thread : threads) {
predictions.push_back(predictedClass); thread.join();
} }
return predictions; return predictions;
} }
@ -261,14 +349,36 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw std::logic_error("You must call fit() before calling predict_proba()"); throw std::logic_error("You must call fit() before calling predict_proba()");
} }
std::vector<std::vector<double>> predictions; // Ensure the sample size is equal to the number of features
if (tsamples.size() != features.size() - 1) {
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::vector<std::vector<double>> predictions(tsamples[0].size(), std::vector<double>(classNumStates, 0.0));
std::vector<int> sample; std::vector<int> sample;
std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
auto worker = [&](const std::vector<int>& sample, int row, std::vector<double>& predictions) {
std::string threadName = "(V)PWorker-" + std::to_string(row);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
std::vector<double> classProbabilities = predict_sample(sample);
predictions = classProbabilities;
semaphore.release();
};
for (int row = 0; row < tsamples[0].size(); ++row) { for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear(); sample.clear();
for (int col = 0; col < tsamples.size(); ++col) { for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]); sample.push_back(tsamples[col][row]);
} }
predictions.push_back(predict_sample(sample)); semaphore.acquire();
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
}
for (auto& thread : threads) {
thread.join();
} }
return predictions; return predictions;
} }
@ -286,11 +396,6 @@ namespace bayesnet {
// Return 1xn std::vector of probabilities // Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const std::vector<int>& sample) std::vector<double> Network::predict_sample(const std::vector<int>& sample)
{ {
// Ensure the sample size is equal to the number of features
if (sample.size() != features.size() - 1) {
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::map<std::string, int> evidence; std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(); ++i) { for (int i = 0; i < sample.size(); ++i) {
evidence[features[i]] = sample[i]; evidence[features[i]] = sample[i];
@ -300,44 +405,26 @@ namespace bayesnet {
// Return 1xn std::vector of probabilities // Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const torch::Tensor& sample) std::vector<double> Network::predict_sample(const torch::Tensor& sample)
{ {
// Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) {
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::map<std::string, int> evidence; std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) { for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>(); evidence[features[i]] = sample[i].item<int>();
} }
return exactInference(evidence); return exactInference(evidence);
} }
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
{
double result = 1.0;
for (auto& node : getNodes()) {
result *= node.second->getFactorValue(completeEvidence);
}
return result;
}
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence) std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
{ {
std::vector<double> result(classNumStates, 0.0); std::vector<double> result(classNumStates, 0.0);
std::vector<std::thread> threads; auto completeEvidence = std::map<std::string, int>(evidence);
std::mutex mtx;
for (int i = 0; i < classNumStates; ++i) { for (int i = 0; i < classNumStates; ++i) {
threads.emplace_back([this, &result, &evidence, i, &mtx]() { completeEvidence[getClassName()] = i;
auto completeEvidence = std::map<std::string, int>(evidence); double partial = 1.0;
completeEvidence[getClassName()] = i; for (auto& node : getNodes()) {
double factor = computeFactor(completeEvidence); partial *= node.second->getFactorValue(completeEvidence);
std::lock_guard<std::mutex> lock(mtx); }
result[i] = factor; result[i] = partial;
});
}
for (auto& thread : threads) {
thread.join();
} }
// Normalize result // Normalize result
double sum = accumulate(result.begin(), result.end(), 0.0); double sum = std::accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; }); transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result; return result;
} }

View File

@ -12,14 +12,18 @@
#include "Node.h" #include "Node.h"
namespace bayesnet { namespace bayesnet {
enum class Smoothing_t {
NONE = -1,
ORIGINAL = 0,
LAPLACE,
CESTNIK
};
class Network { class Network {
public: public:
Network(); Network();
explicit Network(float);
explicit Network(const Network&); explicit Network(const Network&);
~Network() = default; ~Network() = default;
torch::Tensor& getSamples(); torch::Tensor& getSamples();
float getMaxThreads() const;
void addNode(const std::string&); void addNode(const std::string&);
void addEdge(const std::string&, const std::string&); void addEdge(const std::string&, const std::string&);
std::map<std::string, std::unique_ptr<Node>>& getNodes(); std::map<std::string, std::unique_ptr<Node>>& getNodes();
@ -32,9 +36,9 @@ namespace bayesnet {
/* /*
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
*/ */
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
@ -50,19 +54,16 @@ namespace bayesnet {
private: private:
std::map<std::string, std::unique_ptr<Node>> nodes; std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted; bool fitted;
float maxThreads = 0.95;
int classNumStates; int classNumStates;
std::vector<std::string> features; // Including classname std::vector<std::string> features; // Including classname
std::string className; std::string className;
double laplaceSmoothing;
torch::Tensor samples; // n+1xm tensor used to fit the model torch::Tensor samples; // n+1xm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&); bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&); std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&); std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&); std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&); void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&); void setStates(const std::map<std::string, std::vector<int>>&);
}; };
} }

View File

@ -90,51 +90,54 @@ namespace bayesnet {
} }
return result; return result;
} }
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights) void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
{ {
dimensions.clear(); dimensions.clear();
// Get dimensions of the CPT // Get dimensions of the CPT
dimensions.push_back(numStates); dimensions.push_back(numStates);
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
// Create a tensor of zeros with the dimensions of the CPT // Create a tensor of zeros with the dimensions of the CPT
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing; cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing;
// Fill table with counts // Fill table with counts
auto pos = find(features.begin(), features.end(), name); auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) { if (pos == features.end()) {
throw std::logic_error("Feature " + name + " not found in dataset"); throw std::logic_error("Feature " + name + " not found in dataset");
} }
int name_index = pos - features.begin(); int name_index = pos - features.begin();
c10::List<c10::optional<at::Tensor>> coordinates;
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
c10::List<c10::optional<at::Tensor>> coordinates; coordinates.clear();
coordinates.push_back(dataset.index({ name_index, n_sample })); auto sample = dataset.index({ "...", n_sample });
coordinates.push_back(sample[name_index]);
for (auto parent : parents) { for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName()); pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) { if (pos == features.end()) {
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset"); throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
} }
int parent_index = pos - features.begin(); int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample })); coordinates.push_back(sample[parent_index]);
} }
// Increment the count of the corresponding coordinate // Increment the count of the corresponding coordinate
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>()); cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
} }
// Normalize the counts // Normalize the counts
// Divide each row by the sum of the row
cpTable = cpTable / cpTable.sum(0); cpTable = cpTable / cpTable.sum(0);
} }
float Node::getFactorValue(std::map<std::string, int>& evidence) double Node::getFactorValue(std::map<std::string, int>& evidence)
{ {
c10::List<c10::optional<at::Tensor>> coordinates; c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h) // following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(at::tensor(evidence[name])); coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>(); return cpTable.index({ coordinates }).item<double>();
} }
std::vector<std::string> Node::graph(const std::string& className) std::vector<std::string> Node::graph(const std::string& className)
{ {
auto output = std::vector<std::string>(); auto output = std::vector<std::string>();
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : ""; auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
output.push_back(name + " [shape=circle" + suffix + "] \n"); output.push_back("\"" + name + "\" [shape=circle" + suffix + "] \n");
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); }); transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return "\"" + name + "\" -> \"" + child->getName() + "\""; });
return output; return output;
} }
} }

View File

@ -23,12 +23,12 @@ namespace bayesnet {
std::vector<Node*>& getParents(); std::vector<Node*>& getParents();
std::vector<Node*>& getChildren(); std::vector<Node*>& getChildren();
torch::Tensor& getCPT(); torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights); void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights);
int getNumStates() const; int getNumStates() const;
void setNumStates(int); void setNumStates(int);
unsigned minFill(); unsigned minFill();
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
float getFactorValue(std::map<std::string, int>&); double getFactorValue(std::map<std::string, int>&);
private: private:
std::string name; std::string name;
std::vector<Node*> parents; std::vector<Node*> parents;

View File

@ -0,0 +1,46 @@
#ifndef COUNTING_SEMAPHORE_H
#define COUNTING_SEMAPHORE_H
#include <mutex>
#include <condition_variable>
#include <algorithm>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <thread>
class CountingSemaphore {
public:
static CountingSemaphore& getInstance()
{
static CountingSemaphore instance;
return instance;
}
// Delete copy constructor and assignment operator
CountingSemaphore(const CountingSemaphore&) = delete;
CountingSemaphore& operator=(const CountingSemaphore&) = delete;
void acquire()
{
std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [this]() { return count_ > 0; });
--count_;
}
void release()
{
std::lock_guard<std::mutex> lock(mtx_);
++count_;
if (count_ <= max_count_) {
cv_.notify_one();
}
}
private:
CountingSemaphore()
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
count_(max_count_)
{
}
std::mutex mtx_;
std::condition_variable cv_;
const uint max_count_;
uint count_;
};
#endif

@ -1 +1 @@
Subproject commit 8c391e04fe4195d8be862c97f38cfe10e2a3472e Subproject commit 960b763ecd144f156d05ec61f577b04107290137

@ -1 +1 @@
Subproject commit 236d1b2f8be185039493fe7fce04a83e02ed72e5 Subproject commit 2db60e007d70da876379373c53b6421f281daeac

View File

@ -8,7 +8,7 @@ find_package(Torch REQUIRED)
find_library(BayesNet NAMES BayesNet.a libBayesNet.a REQUIRED) find_library(BayesNet NAMES BayesNet.a libBayesNet.a REQUIRED)
include_directories( include_directories(
lib/Files ../tests/lib/Files
lib/mdlp lib/mdlp
lib/json/include lib/json/include
/usr/local/include /usr/local/include

View File

@ -60,9 +60,9 @@ int main(int argc, char* argv[])
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
std::cout << "Library version: " << clf.getVersion() << std::endl; std::cout << "Library version: " << clf.getVersion() << std::endl;
tie(X, y, features, className, states) = loadDataset(file_name, true); tie(X, y, features, className, states) = loadDataset(file_name, true);
clf.fit(X, y, features, className, states); clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE);
auto score = clf.score(X, y); auto score = clf.score(X, y);
std::cout << "File: " << file_name << " score: " << score << std::endl; std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
return 0; return 0;
} }

View File

@ -2,7 +2,7 @@ if(ENABLE_TESTING)
include_directories( include_directories(
${BayesNet_SOURCE_DIR}/tests/lib/Files ${BayesNet_SOURCE_DIR}/tests/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/mdlp ${BayesNet_SOURCE_DIR}/lib/mdlp/src
${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR} ${BayesNet_SOURCE_DIR}
${CMAKE_BINARY_DIR}/configured_files/include ${CMAKE_BINARY_DIR}/configured_files/include

View File

@ -16,7 +16,7 @@ TEST_CASE("Fit and Score", "[A2DE]")
{ {
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::A2DE(); auto clf = bayesnet::A2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.831776).epsilon(raw.epsilon)); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.831776).epsilon(raw.epsilon));
REQUIRE(clf.getNumberOfNodes() == 360); REQUIRE(clf.getNumberOfNodes() == 360);
REQUIRE(clf.getNumberOfEdges() == 756); REQUIRE(clf.getNumberOfEdges() == 756);
@ -30,18 +30,18 @@ TEST_CASE("Test score with predict_voting", "[A2DE]")
{"predict_voting", true}, {"predict_voting", true},
}; };
clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.82243).epsilon(raw.epsilon)); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.82243).epsilon(raw.epsilon));
hyperparameters["predict_voting"] = false; hyperparameters["predict_voting"] = false;
clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.83178).epsilon(raw.epsilon)); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.83178).epsilon(raw.epsilon));
} }
TEST_CASE("Test graph", "[A2DE]") TEST_CASE("Test graph", "[A2DE]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto clf = bayesnet::A2DE(); auto clf = bayesnet::A2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = clf.graph(); auto graph = clf.graph();
REQUIRE(graph.size() == 78); REQUIRE(graph.size() == 78);
REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet A2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n"); REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet A2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");

View File

@ -18,38 +18,38 @@ TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]")
auto model = bayesnet::TAN(); auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
raw.yv.pop_back(); raw.yv.pop_back();
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::runtime_error); REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
} }
TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]") TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]")
{ {
auto model = bayesnet::TAN(); auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto yshort = torch::zeros({ 149 }, torch::kInt32); auto yshort = torch::zeros({ 149 }, torch::kInt32);
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), std::runtime_error); REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
} }
TEST_CASE("Invalid data type", "[Classifier]") TEST_CASE("Invalid data type", "[Classifier]")
{ {
auto model = bayesnet::TAN(); auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", false); auto raw = RawDatasets("iris", false);
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), "dataset (X, y) must be of type Integer"); REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "dataset (X, y) must be of type Integer");
} }
TEST_CASE("Invalid number of features", "[Classifier]") TEST_CASE("Invalid number of features", "[Classifier]")
{ {
auto model = bayesnet::TAN(); auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0); auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0);
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), "Classifier: X 5 and features 4 must have the same number of features"); REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "Classifier: X 5 and features 4 must have the same number of features");
} }
TEST_CASE("Invalid class name", "[Classifier]") TEST_CASE("Invalid class name", "[Classifier]")
{ {
auto model = bayesnet::TAN(); auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), std::invalid_argument); REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), "class name not found in states"); REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), "class name not found in states");
} }
TEST_CASE("Invalid feature name", "[Classifier]") TEST_CASE("Invalid feature name", "[Classifier]")
{ {
@ -57,8 +57,8 @@ TEST_CASE("Invalid feature name", "[Classifier]")
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto statest = raw.states; auto statest = raw.states;
statest.erase("petallength"); statest.erase("petallength");
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), std::invalid_argument); REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), "feature [petallength] not found in states"); REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), "feature [petallength] not found in states");
} }
TEST_CASE("Invalid hyperparameter", "[Classifier]") TEST_CASE("Invalid hyperparameter", "[Classifier]")
{ {
@ -71,7 +71,7 @@ TEST_CASE("Topological order", "[Classifier]")
{ {
auto model = bayesnet::TAN(); auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto order = model.topological_order(); auto order = model.topological_order();
REQUIRE(order.size() == 4); REQUIRE(order.size() == 4);
REQUIRE(order[0] == "petallength"); REQUIRE(order[0] == "petallength");
@ -83,7 +83,7 @@ TEST_CASE("Dump_cpt", "[Classifier]")
{ {
auto model = bayesnet::TAN(); auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto cpt = model.dump_cpt(); auto cpt = model.dump_cpt();
REQUIRE(cpt.size() == 1713); REQUIRE(cpt.size() == 1713);
} }
@ -111,7 +111,7 @@ TEST_CASE("KDB Graph", "[Classifier]")
{ {
auto model = bayesnet::KDB(2); auto model = bayesnet::KDB(2);
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = model.graph(); auto graph = model.graph();
REQUIRE(graph.size() == 15); REQUIRE(graph.size() == 15);
} }
@ -119,7 +119,7 @@ TEST_CASE("KDBLd Graph", "[Classifier]")
{ {
auto model = bayesnet::KDBLd(2); auto model = bayesnet::KDBLd(2);
auto raw = RawDatasets("iris", false); auto raw = RawDatasets("iris", false);
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = model.graph(); auto graph = model.graph();
REQUIRE(graph.size() == 15); REQUIRE(graph.size() == 15);
} }

View File

@ -18,7 +18,7 @@ TEST_CASE("Topological Order", "[Ensemble]")
{ {
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto order = clf.topological_order(); auto order = clf.topological_order();
REQUIRE(order.size() == 0); REQUIRE(order.size() == 0);
} }
@ -26,7 +26,7 @@ TEST_CASE("Dump CPT", "[Ensemble]")
{ {
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto dump = clf.dump_cpt(); auto dump = clf.dump_cpt();
REQUIRE(dump == ""); REQUIRE(dump == "");
} }
@ -34,7 +34,7 @@ TEST_CASE("Number of States", "[Ensemble]")
{ {
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfStates() == 76); REQUIRE(clf.getNumberOfStates() == 76);
} }
TEST_CASE("Show", "[Ensemble]") TEST_CASE("Show", "[Ensemble]")
@ -46,7 +46,7 @@ TEST_CASE("Show", "[Ensemble]")
{"maxTolerance", 1}, {"maxTolerance", 1},
{"convergence", false}, {"convergence", false},
}); });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::string> expected = { std::vector<std::string> expected = {
"class -> sepallength, sepalwidth, petallength, petalwidth, ", "class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> sepallength, sepalwidth, petalwidth, ", "petallength -> sepallength, sepalwidth, petalwidth, ",
@ -78,16 +78,16 @@ TEST_CASE("Graph", "[Ensemble]")
{ {
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = clf.graph(); auto graph = clf.graph();
REQUIRE(graph.size() == 56); REQUIRE(graph.size() == 56);
auto clf2 = bayesnet::AODE(); auto clf2 = bayesnet::AODE();
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
graph = clf2.graph(); graph = clf2.graph();
REQUIRE(graph.size() == 56); REQUIRE(graph.size() == 56);
raw = RawDatasets("glass", false); raw = RawDatasets("glass", false);
auto clf3 = bayesnet::AODELd(); auto clf3 = bayesnet::AODELd();
clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
graph = clf3.graph(); graph = clf3.graph();
REQUIRE(graph.size() == 261); REQUIRE(graph.size() == 261);
} }

View File

@ -20,7 +20,7 @@
#include "bayesnet/ensembles/BoostAODE.h" #include "bayesnet/ensembles/BoostAODE.h"
#include "TestUtils.h" #include "TestUtils.h"
const std::string ACTUAL_VERSION = "1.0.5.1"; const std::string ACTUAL_VERSION = "1.0.6";
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{ {
@ -54,7 +54,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
auto clf = models[name]; auto clf = models[name];
auto discretize = name.substr(name.length() - 2) != "Ld"; auto discretize = name.substr(name.length() - 2) != "Ld";
auto raw = RawDatasets(file_name, discretize); auto raw = RawDatasets(file_name, discretize);
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf->score(raw.Xt, raw.yt); auto score = clf->score(raw.Xt, raw.yt);
INFO("Classifier: " << name << " File: " << file_name); INFO("Classifier: " << name << " File: " << file_name);
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
@ -81,7 +81,7 @@ TEST_CASE("Models features & Graph", "[Models]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto clf = bayesnet::TAN(); auto clf = bayesnet::TAN();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.getNumberOfStates() == 19); REQUIRE(clf.getNumberOfStates() == 19);
@ -93,7 +93,7 @@ TEST_CASE("Models features & Graph", "[Models]")
{ {
auto clf = bayesnet::TANLd(); auto clf = bayesnet::TANLd();
auto raw = RawDatasets("iris", false); auto raw = RawDatasets("iris", false);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.getNumberOfStates() == 19); REQUIRE(clf.getNumberOfStates() == 19);
@ -106,7 +106,7 @@ TEST_CASE("Get num features & num edges", "[Models]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto clf = bayesnet::KDB(2); auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8); REQUIRE(clf.getNumberOfEdges() == 8);
} }
@ -166,7 +166,7 @@ TEST_CASE("Model predict_proba", "[Models]")
SECTION("Test " + model + " predict_proba") SECTION("Test " + model + " predict_proba")
{ {
auto clf = models[model]; auto clf = models[model];
clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto y_pred_proba = clf->predict_proba(raw.Xv); auto y_pred_proba = clf->predict_proba(raw.Xv);
auto yt_pred_proba = clf->predict_proba(raw.Xt); auto yt_pred_proba = clf->predict_proba(raw.Xt);
auto y_pred = clf->predict(raw.Xv); auto y_pred = clf->predict(raw.Xv);
@ -203,7 +203,7 @@ TEST_CASE("AODE voting-proba", "[Models]")
{ {
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::AODE(false); auto clf = bayesnet::AODE(false);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score_proba = clf.score(raw.Xv, raw.yv); auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv); auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({ clf.setHyperparameters({
@ -222,9 +222,9 @@ TEST_CASE("SPODELd dataset", "[Models]")
auto raw = RawDatasets("iris", false); auto raw = RawDatasets("iris", false);
auto clf = bayesnet::SPODELd(0); auto clf = bayesnet::SPODELd(0);
// raw.dataset.to(torch::kFloat32); // raw.dataset.to(torch::kFloat32);
clf.fit(raw.dataset, raw.features, raw.className, raw.states); clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xt, raw.yt); auto score = clf.score(raw.Xt, raw.yt);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto scoret = clf.score(raw.Xt, raw.yt); auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon)); REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon));
@ -233,13 +233,13 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
{ {
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::KDB(2); auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv); auto score = clf.score(raw.Xv, raw.yv);
clf.setHyperparameters({ clf.setHyperparameters({
{"k", 3}, {"k", 3},
{"theta", 0.7}, {"theta", 0.7},
}); });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto scoret = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
@ -248,7 +248,7 @@ TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto clf = bayesnet::SPODELd(0); auto clf = bayesnet::SPODELd(0);
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states), std::runtime_error); REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
} }
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
{ {

View File

@ -15,6 +15,7 @@
#include "bayesnet/network/Node.h" #include "bayesnet/network/Node.h"
#include "bayesnet/utils/bayesnetUtils.h" #include "bayesnet/utils/bayesnetUtils.h"
const double threshold = 1e-4;
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className) void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
{ {
std::vector<pair<int, int>> network = { {0, 1}, {0, 2}, {1, 3} }; std::vector<pair<int, int>> network = { {0, 1}, {0, 2}, {1, 3} };
@ -29,13 +30,11 @@ void buildModel(bayesnet::Network& net, const std::vector<std::string>& features
net.addEdge(className, feature); net.addEdge(className, feature);
} }
} }
TEST_CASE("Test Bayesian Network", "[Network]") TEST_CASE("Test Bayesian Network", "[Network]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto net = bayesnet::Network(); auto net = bayesnet::Network();
double threshold = 1e-4;
SECTION("Test get features") SECTION("Test get features")
{ {
@ -115,9 +114,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
REQUIRE(children == children3); REQUIRE(children == children3);
} }
// Fit networks // Fit networks
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states); net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states); net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(net.getStates() == net2.getStates()); REQUIRE(net.getStates() == net2.getStates());
REQUIRE(net.getStates() == net3.getStates()); REQUIRE(net.getStates() == net3.getStates());
REQUIRE(net.getFeatures() == net2.getFeatures()); REQUIRE(net.getFeatures() == net2.getFeatures());
@ -150,6 +149,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
} }
SECTION("Test show") SECTION("Test show")
{ {
INFO("Test show");
net.addNode("A"); net.addNode("A");
net.addNode("B"); net.addNode("B");
net.addNode("C"); net.addNode("C");
@ -163,6 +163,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
} }
SECTION("Test topological_sort") SECTION("Test topological_sort")
{ {
INFO("Test topological sort");
net.addNode("A"); net.addNode("A");
net.addNode("B"); net.addNode("B");
net.addNode("C"); net.addNode("C");
@ -176,6 +177,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
} }
SECTION("Test graph") SECTION("Test graph")
{ {
INFO("Test graph");
net.addNode("A"); net.addNode("A");
net.addNode("B"); net.addNode("B");
net.addNode("C"); net.addNode("C");
@ -193,8 +195,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
} }
SECTION("Test predict") SECTION("Test predict")
{ {
INFO("Test predict");
buildModel(net, raw.features, raw.className); buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<int> y_test = { 2, 2, 0, 2, 1 }; std::vector<int> y_test = { 2, 2, 0, 2, 1 };
auto y_pred = net.predict(test); auto y_pred = net.predict(test);
@ -202,8 +205,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
} }
SECTION("Test predict_proba") SECTION("Test predict_proba")
{ {
INFO("Test predict_proba");
buildModel(net, raw.features, raw.className); buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<std::vector<double>> y_test = { std::vector<std::vector<double>> y_test = {
{0.450237, 0.0866621, 0.463101}, {0.450237, 0.0866621, 0.463101},
@ -223,15 +227,17 @@ TEST_CASE("Test Bayesian Network", "[Network]")
} }
SECTION("Test score") SECTION("Test score")
{ {
INFO("Test score");
buildModel(net, raw.features, raw.className); buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = net.score(raw.Xv, raw.yv); auto score = net.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold)); REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
} }
SECTION("Copy constructor") SECTION("Copy constructor")
{ {
INFO("Test copy constructor");
buildModel(net, raw.features, raw.className); buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
auto net2 = bayesnet::Network(net); auto net2 = bayesnet::Network(net);
REQUIRE(net.getFeatures() == net2.getFeatures()); REQUIRE(net.getFeatures() == net2.getFeatures());
REQUIRE(net.getEdges() == net2.getEdges()); REQUIRE(net.getEdges() == net2.getEdges());
@ -253,6 +259,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
} }
SECTION("Test oddities") SECTION("Test oddities")
{ {
INFO("Test oddities");
buildModel(net, raw.features, raw.className); buildModel(net, raw.features, raw.className);
// predict without fitting // predict without fitting
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
@ -268,27 +275,27 @@ TEST_CASE("Test Bayesian Network", "[Network]")
// predict with wrong data // predict with wrong data
auto netx = bayesnet::Network(); auto netx = bayesnet::Network();
buildModel(netx, raw.features, raw.className); buildModel(netx, raw.features, raw.className);
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::vector<int>> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} }; std::vector<std::vector<int>> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} };
auto test_tensor2 = bayesnet::vectorToTensor(test2, false); auto test_tensor2 = bayesnet::vectorToTensor(test2, false);
REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error); REQUIRE_THROWS_AS(netx.predict(test2), std::invalid_argument);
REQUIRE_THROWS_WITH(netx.predict(test2), "Sample size (3) does not match the number of features (4)"); REQUIRE_THROWS_WITH(netx.predict(test2), "(V) Sample size (3) does not match the number of features (4)");
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::logic_error); REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::invalid_argument);
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "Sample size (3) does not match the number of features (4)"); REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "(T) Sample size (3) does not match the number of features (4)");
// fit with wrong data // fit with wrong data
// Weights // Weights
auto net2 = bayesnet::Network(); auto net2 = bayesnet::Network();
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit"; std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit";
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states), invalid_weights); REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), invalid_weights);
// X & y // X & y
std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)"; std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)";
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states), invalid_labels); REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), invalid_labels);
// Features // Features
std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)"; std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)";
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states), invalid_features); REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), invalid_features);
// Different number of features // Different number of features
auto net3 = bayesnet::Network(); auto net3 = bayesnet::Network();
auto test2y = { 1, 2, 3, 4, 5 }; auto test2y = { 1, 2, 3, 4, 5 };
@ -296,23 +303,23 @@ TEST_CASE("Test Bayesian Network", "[Network]")
auto features3 = raw.features; auto features3 = raw.features;
features3.pop_back(); features3.pop_back();
std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)"; std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)";
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states), invalid_features2); REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), invalid_features2);
// Uninitialized network // Uninitialized network
std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()"; std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()";
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument); REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), network_invalid); REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), network_invalid);
// Classname // Classname
std::string invalid_classname = "Class Name not found in Network::features"; std::string invalid_classname = "Class Name not found in Network::features";
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument); REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), invalid_classname); REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), invalid_classname);
// Invalid feature // Invalid feature
auto features2 = raw.features; auto features2 = raw.features;
features2.pop_back(); features2.pop_back();
features2.push_back("duck"); features2.push_back("duck");
std::string invalid_feature = "Feature duck not found in Network::features"; std::string invalid_feature = "Feature duck not found in Network::features";
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), invalid_feature); REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), invalid_feature);
// Add twice the same node name to the network => Nothing should happen // Add twice the same node name to the network => Nothing should happen
net.addNode("A"); net.addNode("A");
net.addNode("A"); net.addNode("A");
@ -320,8 +327,8 @@ TEST_CASE("Test Bayesian Network", "[Network]")
auto net4 = bayesnet::Network(); auto net4 = bayesnet::Network();
buildModel(net4, raw.features, raw.className); buildModel(net4, raw.features, raw.className);
std::string invalid_state = "Feature sepallength not found in states"; std::string invalid_state = "Feature sepallength not found in states";
REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>()), std::invalid_argument); REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>()), invalid_state); REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), invalid_state);
} }
} }
@ -342,15 +349,6 @@ TEST_CASE("Cicle in Network", "[Network]")
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument); REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph."); REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph.");
} }
TEST_CASE("Test max threads constructor", "[Network]")
{
auto net = bayesnet::Network();
REQUIRE(net.getMaxThreads() == 0.95f);
auto net2 = bayesnet::Network(4);
REQUIRE(net2.getMaxThreads() == 4);
auto net3 = bayesnet::Network(1.75);
REQUIRE(net3.getMaxThreads() == 1.75);
}
TEST_CASE("Edges troubles", "[Network]") TEST_CASE("Edges troubles", "[Network]")
{ {
auto net = bayesnet::Network(); auto net = bayesnet::Network();
@ -360,13 +358,16 @@ TEST_CASE("Edges troubles", "[Network]")
REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist"); REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist");
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument); REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist"); REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist");
net.addEdge("A", "B");
REQUIRE_THROWS_AS(net.addEdge("A", "B"), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addEdge("A", "B"), "Edge A -> B already exists");
} }
TEST_CASE("Dump CPT", "[Network]") TEST_CASE("Dump CPT", "[Network]")
{ {
auto net = bayesnet::Network(); auto net = bayesnet::Network();
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
buildModel(net, raw.features, raw.className); buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
auto res = net.dump_cpt(); auto res = net.dump_cpt();
std::string expected = R"(* class: (3) : [3] std::string expected = R"(* class: (3) : [3]
0.3333 0.3333
@ -459,3 +460,108 @@ TEST_CASE("Dump CPT", "[Network]")
REQUIRE(res == expected); REQUIRE(res == expected);
} }
TEST_CASE("Test Smoothing A", "[Network]")
{
/*
Tomando m = 1 Pa = 0.5
Si estoy calculando P(A | C), con C en{ 0,1,2 } y tengo :
AC = { 11, 12, 11, 10, 10, 12, 10, 01, 00, 02 }
Entonces:
P(A = 1 | C = 0) = (3 + 1 / 2 * 1) / (4 + 1) = 3.5 / 5
P(A = 0 | C = 0) = (1 + 1 / 2 * 1) / (4 + 1) = 1.5 / 5
Donde m aquí es el número de veces de C = 0 que es la que condiciona y la a priori vuelve a ser sobre A que es sobre las que estaríamos calculando esas marginales.
P(A = 1 | C = 1) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 4
P(A = 0 | C = 1) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 4
P(A = 1 | C = 2) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 5
P(A = 0 | C = 2) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 5
En realidad es parecido a Laplace, que en este caso p.e.con C = 0 sería
P(A = 1 | C = 0) = (3 + 1) / (4 + 2) = 4 / 6
P(A = 0 | C = 0) = (1 + 1) / (4 + 2) = 2 / 6
*/
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("C");
net.addEdge("C", "A");
std::vector<int> C = { 1, 2, 1, 0, 0, 2, 0, 1, 0, 2 };
std::vector<std::vector<int>> A = { { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } };
std::map<std::string, std::vector<int>> states = { { "A", {0, 1} }, { "C", {0, 1, 2} } };
auto weights = std::vector<double>(C.size(), 1);
//
// Laplace
//
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
auto cpt_c_laplace = net.getNodes().at("C")->getCPT();
REQUIRE(cpt_c_laplace.size(0) == 3);
auto laplace_c = std::vector<float>({ 0.3846, 0.3077, 0.3077 });
for (int i = 0; i < laplace_c.size(); ++i) {
REQUIRE(cpt_c_laplace.index({ i }).item<float>() == Catch::Approx(laplace_c[i]).margin(threshold));
}
auto cpt_a_laplace = net.getNodes().at("A")->getCPT();
REQUIRE(cpt_a_laplace.size(0) == 2);
REQUIRE(cpt_a_laplace.size(1) == 3);
auto laplace_a = std::vector<std::vector<float>>({ {0.3333, 0.4000,0.4000}, {0.6667, 0.6000, 0.6000} });
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
REQUIRE(cpt_a_laplace.index({ i, j }).item<float>() == Catch::Approx(laplace_a[i][j]).margin(threshold));
}
}
//
// Cestnik
//
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
auto cpt_c_cestnik = net.getNodes().at("C")->getCPT();
REQUIRE(cpt_c_cestnik.size(0) == 3);
auto cestnik_c = std::vector<float>({ 0.3939, 0.3030, 0.3030 });
for (int i = 0; i < laplace_c.size(); ++i) {
REQUIRE(cpt_c_cestnik.index({ i }).item<float>() == Catch::Approx(cestnik_c[i]).margin(threshold));
}
auto cpt_a_cestnik = net.getNodes().at("A")->getCPT();
REQUIRE(cpt_a_cestnik.size(0) == 2);
REQUIRE(cpt_a_cestnik.size(1) == 3);
auto cestnik_a = std::vector<std::vector<float>>({ {0.3000, 0.3750, 0.3750}, {0.7000, 0.6250, 0.6250} });
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
REQUIRE(cpt_a_cestnik.index({ i, j }).item<float>() == Catch::Approx(cestnik_a[i][j]).margin(threshold));
}
}
}
TEST_CASE("Test Smoothing B", "[Network]")
{
auto net = bayesnet::Network();
net.addNode("X");
net.addNode("Y");
net.addNode("Z");
net.addNode("C");
net.addEdge("C", "X");
net.addEdge("C", "Y");
net.addEdge("C", "Z");
net.addEdge("Y", "Z");
std::vector<int> C = { 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1 };
std::vector<std::vector<int>> Data = {
{ 0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0},
{ 1,2,0,2,2,2,1,0,0,1,1,1,0,1,2,1,0,2},
{ 2,1,3,3,2,0,0,1,3,2,1,2,2,3,0,0,1,2}
};
std::map<std::string, std::vector<int>> states = {
{ "X", {0, 1} },
{ "Y", {0, 1, 2} },
{ "Z", {0, 1, 2, 3} },
{ "C", {0, 1} }
};
auto weights = std::vector<double>(C.size(), 1);
// Simple
std::cout << "LAPLACE\n";
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
std::cout << net.dump_cpt();
std::cout << "Predict proba of {0, 1, 2} y {1, 2, 3} = " << net.predict_proba({ {0, 1}, {1, 2}, {2, 3} }) << std::endl;
std::cout << "ORIGINAL\n";
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::ORIGINAL);
std::cout << net.dump_cpt();
std::cout << "Predict proba of {0, 1, 2} y {1, 2, 3} = " << net.predict_proba({ {0, 1}, {1, 2}, {2, 3} }) << std::endl;
std::cout << "CESTNIK\n";
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
std::cout << net.dump_cpt();
std::cout << "Predict proba of {0, 1, 2} y {1, 2, 3} = " << net.predict_proba({ {0, 1}, {1, 2}, {2, 3} }) << std::endl;
}

View File

@ -17,7 +17,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
{ {
auto raw = RawDatasets("diabetes", true); auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostA2DE(); auto clf = bayesnet::BoostA2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 342); REQUIRE(clf.getNumberOfNodes() == 342);
REQUIRE(clf.getNumberOfEdges() == 684); REQUIRE(clf.getNumberOfEdges() == 684);
REQUIRE(clf.getNotes().size() == 3); REQUIRE(clf.getNotes().size() == 3);
@ -32,7 +32,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// auto raw = RawDatasets("glass", true); // auto raw = RawDatasets("glass", true);
// auto clf = bayesnet::BoostAODE(); // auto clf = bayesnet::BoostAODE();
// clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); // clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); // clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
// REQUIRE(clf.getNumberOfNodes() == 90); // REQUIRE(clf.getNumberOfNodes() == 90);
// REQUIRE(clf.getNumberOfEdges() == 153); // REQUIRE(clf.getNumberOfEdges() == 153);
// REQUIRE(clf.getNotes().size() == 2); // REQUIRE(clf.getNotes().size() == 2);
@ -44,7 +44,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// auto raw = RawDatasets("glass", true); // auto raw = RawDatasets("glass", true);
// auto clf = bayesnet::BoostAODE(); // auto clf = bayesnet::BoostAODE();
// clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); // clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); // clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
// REQUIRE(clf.getNumberOfNodes() == 90); // REQUIRE(clf.getNumberOfNodes() == 90);
// REQUIRE(clf.getNumberOfEdges() == 153); // REQUIRE(clf.getNumberOfEdges() == 153);
// REQUIRE(clf.getNotes().size() == 2); // REQUIRE(clf.getNotes().size() == 2);
@ -60,7 +60,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// {"convergence", true}, // {"convergence", true},
// {"select_features","CFS"}, // {"select_features","CFS"},
// }); // });
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); // clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
// REQUIRE(clf.getNumberOfNodes() == 72); // REQUIRE(clf.getNumberOfNodes() == 72);
// REQUIRE(clf.getNumberOfEdges() == 120); // REQUIRE(clf.getNumberOfEdges() == 120);
// REQUIRE(clf.getNotes().size() == 2); // REQUIRE(clf.getNotes().size() == 2);
@ -75,7 +75,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// { // {
// auto raw = RawDatasets("iris", true); // auto raw = RawDatasets("iris", true);
// auto clf = bayesnet::BoostAODE(false); // auto clf = bayesnet::BoostAODE(false);
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); // clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
// auto score_proba = clf.score(raw.Xv, raw.yv); // auto score_proba = clf.score(raw.Xv, raw.yv);
// auto pred_proba = clf.predict_proba(raw.Xv); // auto pred_proba = clf.predict_proba(raw.Xv);
// clf.setHyperparameters({ // clf.setHyperparameters({
@ -104,7 +104,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// {"maxTolerance", 1}, // {"maxTolerance", 1},
// {"convergence", false}, // {"convergence", false},
// }); // });
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); // clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
// auto score = clf.score(raw.Xv, raw.yv); // auto score = clf.score(raw.Xv, raw.yv);
// auto scoret = clf.score(raw.Xt, raw.yt); // auto scoret = clf.score(raw.Xt, raw.yt);
// INFO("BoostAODE order: " + order); // INFO("BoostAODE order: " + order);
@ -136,7 +136,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// for (const auto& hyper : bad_hyper_fit.items()) { // for (const auto& hyper : bad_hyper_fit.items()) {
// INFO("BoostAODE hyper: " + hyper.value().dump()); // INFO("BoostAODE hyper: " + hyper.value().dump());
// clf.setHyperparameters(hyper.value()); // clf.setHyperparameters(hyper.value());
// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument); // REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing, std::invalid_argument);
// } // }
// } // }
@ -151,7 +151,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// {"block_update", false}, // {"block_update", false},
// {"convergence_best", false}, // {"convergence_best", false},
// }); // });
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); // clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
// REQUIRE(clf.getNumberOfNodes() == 210); // REQUIRE(clf.getNumberOfNodes() == 210);
// REQUIRE(clf.getNumberOfEdges() == 378); // REQUIRE(clf.getNumberOfEdges() == 378);
// REQUIRE(clf.getNotes().size() == 1); // REQUIRE(clf.getNotes().size() == 1);
@ -172,13 +172,13 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// {"convergence_best", true}, // {"convergence_best", true},
// }; // };
// clf.setHyperparameters(hyperparameters); // clf.setHyperparameters(hyperparameters);
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); // clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
// auto score_best = clf.score(raw.X_test, raw.y_test); // auto score_best = clf.score(raw.X_test, raw.y_test);
// REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon)); // REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
// // Now we will set the hyperparameter to use the last accuracy // // Now we will set the hyperparameter to use the last accuracy
// hyperparameters["convergence_best"] = false; // hyperparameters["convergence_best"] = false;
// clf.setHyperparameters(hyperparameters); // clf.setHyperparameters(hyperparameters);
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); // clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
// auto score_last = clf.score(raw.X_test, raw.y_test); // auto score_last = clf.score(raw.X_test, raw.y_test);
// REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); // REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
// } // }
@ -193,7 +193,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
// {"maxTolerance", 3}, // {"maxTolerance", 3},
// {"convergence", true}, // {"convergence", true},
// }); // });
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); // clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
// REQUIRE(clf.getNumberOfNodes() == 868); // REQUIRE(clf.getNumberOfNodes() == 868);
// REQUIRE(clf.getNumberOfEdges() == 1724); // REQUIRE(clf.getNumberOfEdges() == 1724);
// REQUIRE(clf.getNotes().size() == 3); // REQUIRE(clf.getNotes().size() == 3);

View File

@ -18,7 +18,7 @@ TEST_CASE("Feature_select CFS", "[BoostAODE]")
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "CFS"} }); clf.setHyperparameters({ {"select_features", "CFS"} });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2); REQUIRE(clf.getNotes().size() == 2);
@ -30,7 +30,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]")
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2); REQUIRE(clf.getNotes().size() == 2);
@ -42,7 +42,7 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]")
auto raw = RawDatasets("glass", true); auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2); REQUIRE(clf.getNotes().size() == 2);
@ -58,7 +58,7 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]")
{"convergence", true}, {"convergence", true},
{"select_features","CFS"}, {"select_features","CFS"},
}); });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 72); REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120); REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 2); REQUIRE(clf.getNotes().size() == 2);
@ -73,7 +73,7 @@ TEST_CASE("Voting vs proba", "[BoostAODE]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostAODE(false); auto clf = bayesnet::BoostAODE(false);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score_proba = clf.score(raw.Xv, raw.yv); auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv); auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({ clf.setHyperparameters({
@ -102,7 +102,7 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]")
{"maxTolerance", 1}, {"maxTolerance", 1},
{"convergence", false}, {"convergence", false},
}); });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv); auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt); auto scoret = clf.score(raw.Xt, raw.yt);
INFO("BoostAODE order: " << order); INFO("BoostAODE order: " << order);
@ -134,7 +134,7 @@ TEST_CASE("Oddities", "[BoostAODE]")
for (const auto& hyper : bad_hyper_fit.items()) { for (const auto& hyper : bad_hyper_fit.items()) {
INFO("BoostAODE hyper: " << hyper.value().dump()); INFO("BoostAODE hyper: " << hyper.value().dump());
clf.setHyperparameters(hyper.value()); clf.setHyperparameters(hyper.value());
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument); REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
} }
} }
@ -149,7 +149,7 @@ TEST_CASE("Bisection Best", "[BoostAODE]")
{"block_update", false}, {"block_update", false},
{"convergence_best", false}, {"convergence_best", false},
}); });
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 210); REQUIRE(clf.getNumberOfNodes() == 210);
REQUIRE(clf.getNumberOfEdges() == 378); REQUIRE(clf.getNumberOfEdges() == 378);
REQUIRE(clf.getNotes().size() == 1); REQUIRE(clf.getNotes().size() == 1);
@ -170,13 +170,13 @@ TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
{"convergence_best", true}, {"convergence_best", true},
}; };
clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_best = clf.score(raw.X_test, raw.y_test); auto score_best = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon)); REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
// Now we will set the hyperparameter to use the last accuracy // Now we will set the hyperparameter to use the last accuracy
hyperparameters["convergence_best"] = false; hyperparameters["convergence_best"] = false;
clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_last = clf.score(raw.X_test, raw.y_test); auto score_last = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
} }
@ -191,7 +191,7 @@ TEST_CASE("Block Update", "[BoostAODE]")
{"maxTolerance", 3}, {"maxTolerance", 3},
{"convergence", true}, {"convergence", true},
}); });
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 868); REQUIRE(clf.getNumberOfNodes() == 868);
REQUIRE(clf.getNumberOfEdges() == 1724); REQUIRE(clf.getNumberOfEdges() == 1724);
REQUIRE(clf.getNotes().size() == 3); REQUIRE(clf.getNotes().size() == 3);

View File

@ -16,10 +16,10 @@
#include "TestUtils.h" #include "TestUtils.h"
std::map<std::string, std::string> modules = { std::map<std::string, std::string> modules = {
{ "mdlp", "1.1.2" }, { "mdlp", "2.0.0" },
{ "Folding", "1.1.0" }, { "Folding", "1.1.0" },
{ "json", "3.11" }, { "json", "3.11" },
{ "ArffFiles", "1.0.0" } { "ArffFiles", "1.1.0" }
}; };
TEST_CASE("MDLP", "[Modules]") TEST_CASE("MDLP", "[Modules]")

View File

@ -14,6 +14,7 @@
#include <ArffFiles.hpp> #include <ArffFiles.hpp>
#include <CPPFImdlp.h> #include <CPPFImdlp.h>
#include <folding.hpp> #include <folding.hpp>
#include <bayesnet/network/Network.h>
class RawDatasets { class RawDatasets {
@ -32,6 +33,7 @@ public:
bool discretize; bool discretize;
int num_samples = 0; int num_samples = 0;
bool shuffle = false; bool shuffle = false;
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
private: private:
std::string to_string() std::string to_string()
{ {

@ -1 +1 @@
Subproject commit 40ac38011a2445e00df8a18048c67abaff16fa59 Subproject commit a5316928d408266aa425f64131ab0f592b010a8d