diff --git a/.vscode/launch.json b/.vscode/launch.json index 66b5438..40fbf2f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,11 +14,11 @@ "type": "lldb", "request": "launch", "name": "test", - "program": "${workspaceFolder}/build_debug/tests/TestBayesNet", + "program": "${workspaceFolder}/build_Debug/tests/TestBayesNet", "args": [ - "[Node]" + "[Network]" ], - "cwd": "${workspaceFolder}/build_debug/tests" + "cwd": "${workspaceFolder}/build_Debug/tests" }, { "name": "(gdb) Launch", diff --git a/README.md b/README.md index bdc77a3..7cad08d 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-97,0%25-green)](html/index.html) +[![Coverage Badge](https://img.shields.io/badge/Coverage-97,1%25-green)](html/index.html) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 8aee16d..0458d3a 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -224,14 +224,34 @@ namespace bayesnet { if (!fitted) { throw std::logic_error("You must call fit() before calling predict()"); } + // Ensure the sample size is equal to the number of features + if (samples.size(0) != features.size() - 1) { + throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); + } torch::Tensor result; + std::vector threads; + std::mutex mtx; + auto& semaphore = CountingSemaphore::getInstance(); result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); - for (int i = 0; i < samples.size(1); ++i) { - const torch::Tensor sample = samples.index({ "...", i }); + auto worker = [&](const torch::Tensor& sample, int i) { + std::string threadName = "PredictWorker-" + std::to_string(i); + pthread_setname_np(pthread_self(), threadName.c_str()); + semaphore.acquire(); auto psample = predict_sample(sample); auto temp = torch::tensor(psample, torch::kFloat64); - // result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); - result.index_put_({ i, "..." }, temp); + { + std::lock_guard lock(mtx); + result.index_put_({ i, "..." }, temp); + } + semaphore.release(); + }; + for (int i = 0; i < samples.size(1); ++i) { + const torch::Tensor sample = samples.index({ "...", i }); + threads.emplace_back(worker, sample, i); + } + for (auto& thread : threads) { + thread.join(); } if (proba) return result; @@ -256,18 +276,36 @@ namespace bayesnet { if (!fitted) { throw std::logic_error("You must call fit() before calling predict()"); } - std::vector predictions; + // Ensure the sample size is equal to the number of features + if (tsamples.size() != features.size() - 1) { + throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); + } + std::vector predictions(tsamples[0].size(), 0); std::vector sample; + std::vector threads; + std::mutex mtx; + auto& semaphore = CountingSemaphore::getInstance(); + auto worker = [&](const std::vector& sample, const int row, std::vector& predictions) { + semaphore.acquire(); + auto classProbabilities = predict_sample(sample); + auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); + int predictedClass = distance(classProbabilities.begin(), maxElem); + { + std::lock_guard lock(mtx); + predictions[row] = predictedClass; + } + semaphore.release(); + }; for (int row = 0; row < tsamples[0].size(); ++row) { sample.clear(); for (int col = 0; col < tsamples.size(); ++col) { sample.push_back(tsamples[col][row]); } - std::vector classProbabilities = predict_sample(sample); - // Find the class with the maximum posterior probability - auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); - int predictedClass = distance(classProbabilities.begin(), maxElem); - predictions.push_back(predictedClass); + threads.emplace_back(worker, sample, row, std::ref(predictions)); + } + for (auto& thread : threads) { + thread.join(); } return predictions; } @@ -278,6 +316,11 @@ namespace bayesnet { if (!fitted) { throw std::logic_error("You must call fit() before calling predict_proba()"); } + // Ensure the sample size is equal to the number of features + if (tsamples.size() != features.size() - 1) { + throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); + } std::vector> predictions; std::vector sample; for (int row = 0; row < tsamples[0].size(); ++row) { @@ -303,11 +346,6 @@ namespace bayesnet { // Return 1xn std::vector of probabilities std::vector Network::predict_sample(const std::vector& sample) { - // Ensure the sample size is equal to the number of features - if (sample.size() != features.size() - 1) { - throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) + - ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); - } std::map evidence; for (int i = 0; i < sample.size(); ++i) { evidence[features[i]] = sample[i]; @@ -317,56 +355,23 @@ namespace bayesnet { // Return 1xn std::vector of probabilities std::vector Network::predict_sample(const torch::Tensor& sample) { - // Ensure the sample size is equal to the number of features - if (sample.size(0) != features.size() - 1) { - throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) + - ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); - } std::map evidence; for (int i = 0; i < sample.size(0); ++i) { evidence[features[i]] = sample[i].item(); } return exactInference(evidence); } - double Network::computeFactor(std::map& completeEvidence) - { - double result = 1.0; - for (auto& node : getNodes()) { - result *= node.second->getFactorValue(completeEvidence); - } - return result; - } std::vector Network::exactInference(std::map& evidence) { - - - //Implementar una cache para acelerar la inferencia. - // Cambiar la estrategia de crear hilos en la inferencia (por nodos como en fit?) - - - std::vector result(classNumStates, 0.0); - std::vector threads; - std::mutex mtx; - auto& semaphore = CountingSemaphore::getInstance(); - auto worker = [&](int i) { - semaphore.acquire(); - std::string threadName = "InferenceWorker-" + std::to_string(i); - pthread_setname_np(pthread_self(), threadName.c_str()); - auto completeEvidence = std::map(evidence); - completeEvidence[getClassName()] = i; - double factor = computeFactor(completeEvidence); - { - std::lock_guard lock(mtx); - result[i] = factor; - } - semaphore.release(); - }; + auto completeEvidence = std::map(evidence); for (int i = 0; i < classNumStates; ++i) { - threads.emplace_back(worker, i); - } - for (auto& thread : threads) { - thread.join(); + completeEvidence[getClassName()] = i; + double partial = 1.0; + for (auto& node : getNodes()) { + partial *= node.second->getFactorValue(completeEvidence); + } + result[i] = partial; } // Normalize result double sum = std::accumulate(result.begin(), result.end(), 0.0); diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index 1aea190..0210877 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -21,11 +21,9 @@ namespace bayesnet { class Network { public: Network(); - explicit Network(float); explicit Network(const Network&); ~Network() = default; torch::Tensor& getSamples(); - float getMaxThreads() const; void addNode(const std::string&); void addEdge(const std::string&, const std::string&); std::map>& getNodes(); @@ -64,7 +62,6 @@ namespace bayesnet { std::vector predict_sample(const std::vector&); std::vector predict_sample(const torch::Tensor&); std::vector exactInference(std::map&); - double computeFactor(std::map&); void completeFit(const std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing); void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); void setStates(const std::map>&); diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index 3638a19..0663ca2 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -149,6 +149,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test show") { + INFO("Test show"); net.addNode("A"); net.addNode("B"); net.addNode("C"); @@ -162,6 +163,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test topological_sort") { + INFO("Test topological sort"); net.addNode("A"); net.addNode("B"); net.addNode("C"); @@ -175,6 +177,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test graph") { + INFO("Test graph"); net.addNode("A"); net.addNode("B"); net.addNode("C"); @@ -192,6 +195,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test predict") { + INFO("Test predict"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; @@ -201,6 +205,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test predict_proba") { + INFO("Test predict_proba"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; @@ -222,6 +227,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test score") { + INFO("Test score"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto score = net.score(raw.Xv, raw.yv); @@ -229,6 +235,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Copy constructor") { + INFO("Test copy constructor"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto net2 = bayesnet::Network(net); @@ -252,6 +259,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test oddities") { + INFO("Test oddities"); buildModel(net, raw.features, raw.className); // predict without fitting std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; @@ -270,10 +278,10 @@ TEST_CASE("Test Bayesian Network", "[Network]") netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} }; auto test_tensor2 = bayesnet::vectorToTensor(test2, false); - REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error); - REQUIRE_THROWS_WITH(netx.predict(test2), "Sample size (3) does not match the number of features (4)"); - REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::logic_error); - REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "Sample size (3) does not match the number of features (4)"); + REQUIRE_THROWS_AS(netx.predict(test2), std::invalid_argument); + REQUIRE_THROWS_WITH(netx.predict(test2), "(V) Sample size (3) does not match the number of features (4)"); + REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::invalid_argument); + REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "(T) Sample size (3) does not match the number of features (4)"); // fit with wrong data // Weights auto net2 = bayesnet::Network(); @@ -341,15 +349,6 @@ TEST_CASE("Cicle in Network", "[Network]") REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument); REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph."); } -TEST_CASE("Test max threads constructor", "[Network]") -{ - auto net = bayesnet::Network(); - REQUIRE(net.getMaxThreads() == 0.95f); - auto net2 = bayesnet::Network(4); - REQUIRE(net2.getMaxThreads() == 4); - auto net3 = bayesnet::Network(1.75); - REQUIRE(net3.getMaxThreads() == 1.75); -} TEST_CASE("Edges troubles", "[Network]") { auto net = bayesnet::Network();