diff --git a/Makefile b/Makefile index 6d8a7aa..cb02f3a 100644 --- a/Makefile +++ b/Makefile @@ -133,8 +133,13 @@ coverage: ## Run tests and generate coverage report (build/index.html) @echo ">>> Done"; viewcoverage: ## View the html coverage report - @which $(genhtml) || (echo ">>> Please install lcov (genhtml not found)"; exit 1) - @$(genhtml) $(f_debug)/tests/coverage.info --demangle-cpp --output-directory html --title "BayesNet Coverage Report" -s -k -f --legend >/dev/null 2>&1; + @which $(genhtml) >/dev/null || (echo ">>> Please install lcov (genhtml not found)"; exit 1) + @if [ ! -d $(docsrcdir)/coverage ]; then mkdir -p $(docsrcdir)/coverage; fi + @if [ ! -f $(f_debug)/tests/coverage.info ]; then \ + echo ">>> No coverage.info file found. Run make coverage first!"; \ + exit 1; \ + fi + @$(genhtml) $(f_debug)/tests/coverage.info --demangle-cpp --output-directory $(docsrcdir)/coverage --title "BayesNet Coverage Report" -s -k -f --legend >/dev/null 2>&1; @xdg-open html/index.html || open html/index.html 2>/dev/null @echo ">>> Done"; @@ -151,6 +156,7 @@ updatebadge: ## Update the coverage badge in README.md doc: ## Generate documentation @echo ">>> Generating documentation..." @cmake --build $(f_release) -t doxygen + @cp -rp diagrams $(docsrcdir) @echo ">>> Done"; docdir = "" diff --git a/README.md b/README.md index 67d8213..1b47306 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ make sample fname=tests/data/glass.arff ### [Manual](https://rmontanana.github.io/bayesnet/) -### [Coverage report](docs/coverage.pdf) +### [Coverage report](https://rmontanana.github.io/bayesnet/coverage/index.html) ## Diagrams diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 95acb53..afb2526 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -950,6 +950,7 @@ WARN_LOGFILE = # Note: If this tag is empty the current directory is searched. INPUT = /Users/rmontanana/Code/BayesNet/bayesnet +INPUT += /Users/rmontanana/Code/BayesNet/README.md # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -1174,7 +1175,7 @@ FILTER_SOURCE_PATTERNS = # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. -USE_MDFILE_AS_MAINPAGE = +USE_MDFILE_AS_MAINPAGE = README.md # The Fortran standard specifies that for fixed formatted Fortran code all # characters from position 72 are to be considered as comment. A common @@ -1424,7 +1425,8 @@ HTML_EXTRA_STYLESHEET = # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = +HTML_EXTRA_FILES = /Users/rmontanana/Code/BayesNet/diagrams +HTML_EXTRA_FILES += /Users/rmontanana/Code/BayesNet/logo.png # The HTML_COLORSTYLE tag can be used to specify if the generated HTML output # should be rendered with a dark or light theme. diff --git a/docs/coverage.pdf b/docs/coverage.pdf deleted file mode 100644 index 3d4b011..0000000 Binary files a/docs/coverage.pdf and /dev/null differ diff --git a/html/amber.png b/html/amber.png deleted file mode 100644 index 2cab170..0000000 Binary files a/html/amber.png and /dev/null differ diff --git a/html/bayesnet/BaseClassifier.h.func-c.html b/html/bayesnet/BaseClassifier.h.func-c.html deleted file mode 100644 index 7dd3cc5..0000000 --- a/html/bayesnet/BaseClassifier.h.func-c.html +++ /dev/null @@ -1,90 +0,0 @@ - - - - -
- -LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::BaseClassifier::~BaseClassifier() | - -1680 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::BaseClassifier::~BaseClassifier() | - -1680 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #pragma once - 8 : #include <vector> - 9 : #include <torch/torch.h> - 10 : #include <nlohmann/json.hpp> - 11 : namespace bayesnet { - 12 : enum status_t { NORMAL, WARNING, ERROR }; - 13 : class BaseClassifier { - 14 : public: - 15 : // X is nxm std::vector, y is nx1 std::vector - 16 : virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0; - 17 : // X is nxm tensor, y is nx1 tensor - 18 : virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0; - 19 : virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0; - 20 : virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0; - 21 1680 : virtual ~BaseClassifier() = default; - 22 : torch::Tensor virtual predict(torch::Tensor& X) = 0; - 23 : std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0; - 24 : torch::Tensor virtual predict_proba(torch::Tensor& X) = 0; - 25 : std::vector<std::vector<double>> virtual predict_proba(std::vector<std::vector<int >>& X) = 0; - 26 : status_t virtual getStatus() const = 0; - 27 : float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0; - 28 : float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; - 29 : int virtual getNumberOfNodes()const = 0; - 30 : int virtual getNumberOfEdges()const = 0; - 31 : int virtual getNumberOfStates() const = 0; - 32 : int virtual getClassNumStates() const = 0; - 33 : std::vector<std::string> virtual show() const = 0; - 34 : std::vector<std::string> virtual graph(const std::string& title = "") const = 0; - 35 : virtual std::string getVersion() = 0; - 36 : std::vector<std::string> virtual topological_order() = 0; - 37 : std::vector<std::string> virtual getNotes() const = 0; - 38 : std::string virtual dump_cpt()const = 0; - 39 : virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0; - 40 : std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; } - 41 : protected: - 42 : virtual void trainModel(const torch::Tensor& weights) = 0; - 43 : std::vector<std::string> validHyperparameters; - 44 : }; - 45 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <sstream> - 8 : #include "bayesnet/utils/bayesnetUtils.h" - 9 : #include "Classifier.h" - 10 : - 11 : namespace bayesnet { - 12 2240 : Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} - 13 : const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; - 14 1760 : Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) - 15 : { - 16 1760 : this->features = features; - 17 1760 : this->className = className; - 18 1760 : this->states = states; - 19 1760 : m = dataset.size(1); - 20 1760 : n = features.size(); - 21 1760 : checkFitParameters(); - 22 1728 : auto n_classes = states.at(className).size(); - 23 1728 : metrics = Metrics(dataset, features, className, n_classes); - 24 1728 : model.initialize(); - 25 1728 : buildModel(weights); - 26 1728 : trainModel(weights); - 27 1712 : fitted = true; - 28 1712 : return *this; - 29 : } - 30 340 : void Classifier::buildDataset(torch::Tensor& ytmp) - 31 : { - 32 : try { - 33 340 : auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1); - 34 1052 : dataset = torch::cat({ dataset, yresized }, 0); - 35 340 : } - 36 16 : catch (const std::exception& e) { - 37 16 : std::stringstream oss; - 38 16 : oss << "* Error in X and y dimensions *\n"; - 39 16 : oss << "X dimensions: " << dataset.sizes() << "\n"; - 40 16 : oss << "y dimensions: " << ytmp.sizes(); - 41 16 : throw std::runtime_error(oss.str()); - 42 32 : } - 43 680 : } - 44 1576 : void Classifier::trainModel(const torch::Tensor& weights) - 45 : { - 46 1576 : model.fit(dataset, weights, features, className, states); - 47 1576 : } - 48 : // X is nxm where n is the number of features and m the number of samples - 49 128 : Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) - 50 : { - 51 128 : dataset = X; - 52 128 : buildDataset(y); - 53 120 : const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - 54 208 : return build(features, className, states, weights); - 55 120 : } - 56 : // X is nxm where n is the number of features and m the number of samples - 57 136 : Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) - 58 : { - 59 136 : dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32); - 60 976 : for (int i = 0; i < X.size(); ++i) { - 61 3360 : dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32)); - 62 : } - 63 136 : auto ytmp = torch::tensor(y, torch::kInt32); - 64 136 : buildDataset(ytmp); - 65 128 : const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - 66 240 : return build(features, className, states, weights); - 67 992 : } - 68 852 : Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) - 69 : { - 70 852 : this->dataset = dataset; - 71 852 : const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - 72 1704 : return build(features, className, states, weights); - 73 852 : } - 74 660 : Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) - 75 : { - 76 660 : this->dataset = dataset; - 77 660 : return build(features, className, states, weights); - 78 : } - 79 1760 : void Classifier::checkFitParameters() - 80 : { - 81 1760 : if (torch::is_floating_point(dataset)) { - 82 8 : throw std::invalid_argument("dataset (X, y) must be of type Integer"); - 83 : } - 84 1752 : if (dataset.size(0) - 1 != features.size()) { - 85 8 : throw std::invalid_argument("Classifier: X " + std::to_string(dataset.size(0) - 1) + " and features " + std::to_string(features.size()) + " must have the same number of features"); - 86 : } - 87 1744 : if (states.find(className) == states.end()) { - 88 8 : throw std::invalid_argument("class name not found in states"); - 89 : } - 90 32996 : for (auto feature : features) { - 91 31268 : if (states.find(feature) == states.end()) { - 92 8 : throw std::invalid_argument("feature [" + feature + "] not found in states"); - 93 : } - 94 31268 : } - 95 1728 : } - 96 1844 : torch::Tensor Classifier::predict(torch::Tensor& X) - 97 : { - 98 1844 : if (!fitted) { - 99 16 : throw std::logic_error(CLASSIFIER_NOT_FITTED); - 100 : } - 101 1828 : return model.predict(X); - 102 : } - 103 16 : std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X) - 104 : { - 105 16 : if (!fitted) { - 106 8 : throw std::logic_error(CLASSIFIER_NOT_FITTED); - 107 : } - 108 8 : auto m_ = X[0].size(); - 109 8 : auto n_ = X.size(); - 110 8 : std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0)); - 111 40 : for (auto i = 0; i < n_; i++) { - 112 64 : Xd[i] = std::vector<int>(X[i].begin(), X[i].end()); - 113 : } - 114 8 : auto yp = model.predict(Xd); - 115 16 : return yp; - 116 8 : } - 117 1484 : torch::Tensor Classifier::predict_proba(torch::Tensor& X) - 118 : { - 119 1484 : if (!fitted) { - 120 8 : throw std::logic_error(CLASSIFIER_NOT_FITTED); - 121 : } - 122 1476 : return model.predict_proba(X); - 123 : } - 124 548 : std::vector<std::vector<double>> Classifier::predict_proba(std::vector<std::vector<int>>& X) - 125 : { - 126 548 : if (!fitted) { - 127 8 : throw std::logic_error(CLASSIFIER_NOT_FITTED); - 128 : } - 129 540 : auto m_ = X[0].size(); - 130 540 : auto n_ = X.size(); - 131 540 : std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0)); - 132 : // Convert to nxm vector - 133 5040 : for (auto i = 0; i < n_; i++) { - 134 9000 : Xd[i] = std::vector<int>(X[i].begin(), X[i].end()); - 135 : } - 136 540 : auto yp = model.predict_proba(Xd); - 137 1080 : return yp; - 138 540 : } - 139 112 : float Classifier::score(torch::Tensor& X, torch::Tensor& y) - 140 : { - 141 112 : torch::Tensor y_pred = predict(X); - 142 208 : return (y_pred == y).sum().item<float>() / y.size(0); - 143 104 : } - 144 16 : float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y) - 145 : { - 146 16 : if (!fitted) { - 147 8 : throw std::logic_error(CLASSIFIER_NOT_FITTED); - 148 : } - 149 8 : return model.score(X, y); - 150 : } - 151 24 : std::vector<std::string> Classifier::show() const - 152 : { - 153 24 : return model.show(); - 154 : } - 155 1576 : void Classifier::addNodes() - 156 : { - 157 : // Add all nodes to the network - 158 30872 : for (const auto& feature : features) { - 159 29296 : model.addNode(feature); - 160 : } - 161 1576 : model.addNode(className); - 162 1576 : } - 163 332 : int Classifier::getNumberOfNodes() const - 164 : { - 165 : // Features does not include class - 166 332 : return fitted ? model.getFeatures().size() : 0; - 167 : } - 168 332 : int Classifier::getNumberOfEdges() const - 169 : { - 170 332 : return fitted ? model.getNumEdges() : 0; - 171 : } - 172 24 : int Classifier::getNumberOfStates() const - 173 : { - 174 24 : return fitted ? model.getStates() : 0; - 175 : } - 176 348 : int Classifier::getClassNumStates() const - 177 : { - 178 348 : return fitted ? model.getClassNumStates() : 0; - 179 : } - 180 4 : std::vector<std::string> Classifier::topological_order() - 181 : { - 182 4 : return model.topological_sort(); - 183 : } - 184 4 : std::string Classifier::dump_cpt() const - 185 : { - 186 4 : return model.dump_cpt(); - 187 : } - 188 92 : void Classifier::setHyperparameters(const nlohmann::json& hyperparameters) - 189 : { - 190 92 : if (!hyperparameters.empty()) { - 191 8 : throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump()); - 192 : } - 193 84 : } - 194 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Classifier::getVersion[abi:cxx11]() | - -32 | - - -
bayesnet::Classifier::getNotes[abi:cxx11]() const | - -80 | - - -
bayesnet::Classifier::getStatus() const | - -128 | - - -
bayesnet::Classifier::~Classifier() | - -1680 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Classifier::getNotes[abi:cxx11]() const | - -80 | - - -
bayesnet::Classifier::getStatus() const | - -128 | - - -
bayesnet::Classifier::getVersion[abi:cxx11]() | - -32 | - - -
bayesnet::Classifier::~Classifier() | - -1680 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef CLASSIFIER_H - 8 : #define CLASSIFIER_H - 9 : #include <torch/torch.h> - 10 : #include "bayesnet/utils/BayesMetrics.h" - 11 : #include "bayesnet/network/Network.h" - 12 : #include "bayesnet/BaseClassifier.h" - 13 : - 14 : namespace bayesnet { - 15 : class Classifier : public BaseClassifier { - 16 : public: - 17 : Classifier(Network model); - 18 1680 : virtual ~Classifier() = default; - 19 : Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; - 20 : Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; - 21 : Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; - 22 : Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override; - 23 : void addNodes(); - 24 : int getNumberOfNodes() const override; - 25 : int getNumberOfEdges() const override; - 26 : int getNumberOfStates() const override; - 27 : int getClassNumStates() const override; - 28 : torch::Tensor predict(torch::Tensor& X) override; - 29 : std::vector<int> predict(std::vector<std::vector<int>>& X) override; - 30 : torch::Tensor predict_proba(torch::Tensor& X) override; - 31 : std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override; - 32 128 : status_t getStatus() const override { return status; } - 33 96 : std::string getVersion() override { return { project_version.begin(), project_version.end() }; }; - 34 : float score(torch::Tensor& X, torch::Tensor& y) override; - 35 : float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override; - 36 : std::vector<std::string> show() const override; - 37 : std::vector<std::string> topological_order() override; - 38 80 : std::vector<std::string> getNotes() const override { return notes; } - 39 : std::string dump_cpt() const override; - 40 : void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters - 41 : protected: - 42 : bool fitted; - 43 : unsigned int m, n; // m: number of samples, n: number of features - 44 : Network model; - 45 : Metrics metrics; - 46 : std::vector<std::string> features; - 47 : std::string className; - 48 : std::map<std::string, std::vector<int>> states; - 49 : torch::Tensor dataset; // (n+1)xm tensor - 50 : status_t status = NORMAL; - 51 : std::vector<std::string> notes; // Used to store messages occurred during the fit process - 52 : void checkFitParameters(); - 53 : virtual void buildModel(const torch::Tensor& weights) = 0; - 54 : void trainModel(const torch::Tensor& weights) override; - 55 : void buildDataset(torch::Tensor& y); - 56 : private: - 57 : Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); - 58 : }; - 59 : } - 60 : #endif - 61 : - 62 : - 63 : - 64 : - 65 : -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "KDB.h" - 8 : - 9 : namespace bayesnet { - 10 148 : KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) - 11 : { - 12 444 : validHyperparameters = { "k", "theta" }; - 13 : - 14 444 : } - 15 12 : void KDB::setHyperparameters(const nlohmann::json& hyperparameters_) - 16 : { - 17 12 : auto hyperparameters = hyperparameters_; - 18 12 : if (hyperparameters.contains("k")) { - 19 4 : k = hyperparameters["k"]; - 20 4 : hyperparameters.erase("k"); - 21 : } - 22 12 : if (hyperparameters.contains("theta")) { - 23 4 : theta = hyperparameters["theta"]; - 24 4 : hyperparameters.erase("theta"); - 25 : } - 26 12 : Classifier::setHyperparameters(hyperparameters); - 27 12 : } - 28 52 : void KDB::buildModel(const torch::Tensor& weights) - 29 : { - 30 : /* - 31 : 1. For each feature Xi, compute mutual information, I(X;C), - 32 : where C is the class. - 33 : 2. Compute class conditional mutual information I(Xi;XjIC), f or each - 34 : pair of features Xi and Xj, where i#j. - 35 : 3. Let the used variable list, S, be empty. - 36 : 4. Let the DAG network being constructed, BN, begin with a single - 37 : class node, C. - 38 : 5. Repeat until S includes all domain features - 39 : 5.1. Select feature Xmax which is not in S and has the largest value - 40 : I(Xmax;C). - 41 : 5.2. Add a node to BN representing Xmax. - 42 : 5.3. Add an arc from C to Xmax in BN. - 43 : 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with - 44 : the highest value for I(Xmax;X,jC). - 45 : 5.5. Add Xmax to S. - 46 : Compute the conditional probabilility infered by the structure of BN by - 47 : using counts from DB, and output BN. - 48 : */ - 49 : // 1. For each feature Xi, compute mutual information, I(X;C), - 50 : // where C is the class. - 51 52 : addNodes(); - 52 156 : const torch::Tensor& y = dataset.index({ -1, "..." }); - 53 52 : std::vector<double> mi; - 54 396 : for (auto i = 0; i < features.size(); i++) { - 55 1032 : torch::Tensor firstFeature = dataset.index({ i, "..." }); - 56 344 : mi.push_back(metrics.mutualInformation(firstFeature, y, weights)); - 57 344 : } - 58 : // 2. Compute class conditional mutual information I(Xi;XjIC), f or each - 59 52 : auto conditionalEdgeWeights = metrics.conditionalEdge(weights); - 60 : // 3. Let the used variable list, S, be empty. - 61 52 : std::vector<int> S; - 62 : // 4. Let the DAG network being constructed, BN, begin with a single - 63 : // class node, C. - 64 : // 5. Repeat until S includes all domain features - 65 : // 5.1. Select feature Xmax which is not in S and has the largest value - 66 : // I(Xmax;C). - 67 52 : auto order = argsort(mi); - 68 396 : for (auto idx : order) { - 69 : // 5.2. Add a node to BN representing Xmax. - 70 : // 5.3. Add an arc from C to Xmax in BN. - 71 344 : model.addEdge(className, features[idx]); - 72 : // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with - 73 : // the highest value for I(Xmax;X,jC). - 74 344 : add_m_edges(idx, S, conditionalEdgeWeights); - 75 : // 5.5. Add Xmax to S. - 76 344 : S.push_back(idx); - 77 : } - 78 448 : } - 79 344 : void KDB::add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights) - 80 : { - 81 344 : auto n_edges = std::min(k, static_cast<int>(S.size())); - 82 344 : auto cond_w = clone(weights); - 83 344 : bool exit_cond = k == 0; - 84 344 : int num = 0; - 85 1004 : while (!exit_cond) { - 86 2640 : auto max_minfo = argmax(cond_w.index({ idx, "..." })).item<int>(); - 87 660 : auto belongs = find(S.begin(), S.end(), max_minfo) != S.end(); - 88 1764 : if (belongs && cond_w.index({ idx, max_minfo }).item<float>() > theta) { - 89 : try { - 90 320 : model.addEdge(features[max_minfo], features[idx]); - 91 320 : num++; - 92 : } - 93 0 : catch (const std::invalid_argument& e) { - 94 : // Loops are not allowed - 95 0 : } - 96 : } - 97 2640 : cond_w.index_put_({ idx, max_minfo }, -1); - 98 1980 : auto candidates_mask = cond_w.index({ idx, "..." }).gt(theta); - 99 660 : auto candidates = candidates_mask.nonzero(); - 100 660 : exit_cond = num == n_edges || candidates.size(0) == 0; - 101 660 : } - 102 2692 : } - 103 8 : std::vector<std::string> KDB::graph(const std::string& title) const - 104 : { - 105 8 : std::string header{ title }; - 106 8 : if (title == "KDB") { - 107 8 : header += " (k=" + std::to_string(k) + ", theta=" + std::to_string(theta) + ")"; - 108 : } - 109 16 : return model.graph(header); - 110 8 : } - 111 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::KDB::~KDB() | - -44 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::KDB::~KDB() | - -44 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef KDB_H - 8 : #define KDB_H - 9 : #include <torch/torch.h> - 10 : #include "bayesnet/utils/bayesnetUtils.h" - 11 : #include "Classifier.h" - 12 : namespace bayesnet { - 13 : class KDB : public Classifier { - 14 : private: - 15 : int k; - 16 : float theta; - 17 : void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights); - 18 : protected: - 19 : void buildModel(const torch::Tensor& weights) override; - 20 : public: - 21 : explicit KDB(int k, float theta = 0.03); - 22 44 : virtual ~KDB() = default; - 23 : void setHyperparameters(const nlohmann::json& hyperparameters_) override; - 24 : std::vector<std::string> graph(const std::string& name = "KDB") const override; - 25 : }; - 26 : } - 27 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "KDBLd.h" - 8 : - 9 : namespace bayesnet { - 10 68 : KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} - 11 20 : KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) - 12 : { - 13 20 : checkInput(X_, y_); - 14 20 : features = features_; - 15 20 : className = className_; - 16 20 : Xf = X_; - 17 20 : y = y_; - 18 : // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - 19 20 : states = fit_local_discretization(y); - 20 : // We have discretized the input data - 21 : // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network - 22 20 : KDB::fit(dataset, features, className, states); - 23 20 : states = localDiscretizationProposal(states, model); - 24 20 : return *this; - 25 : } - 26 16 : torch::Tensor KDBLd::predict(torch::Tensor& X) - 27 : { - 28 16 : auto Xt = prepareX(X); - 29 32 : return KDB::predict(Xt); - 30 16 : } - 31 4 : std::vector<std::string> KDBLd::graph(const std::string& name) const - 32 : { - 33 4 : return KDB::graph(name); - 34 : } - 35 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::KDBLd::~KDBLd() | - -20 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::KDBLd::~KDBLd() | - -20 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef KDBLD_H - 8 : #define KDBLD_H - 9 : #include "Proposal.h" - 10 : #include "KDB.h" - 11 : - 12 : namespace bayesnet { - 13 : class KDBLd : public KDB, public Proposal { - 14 : private: - 15 : public: - 16 : explicit KDBLd(int k); - 17 20 : virtual ~KDBLd() = default; - 18 : KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; - 19 : std::vector<std::string> graph(const std::string& name = "KDB") const override; - 20 : torch::Tensor predict(torch::Tensor& X) override; - 21 : static inline std::string version() { return "0.0.1"; }; - 22 : }; - 23 : } - 24 : #endif // !KDBLD_H -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <ArffFiles.h> - 8 : #include "Proposal.h" - 9 : - 10 : namespace bayesnet { - 11 424 : Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {} - 12 200 : Proposal::~Proposal() - 13 : { - 14 1896 : for (auto& [key, value] : discretizers) { - 15 1696 : delete value; - 16 : } - 17 200 : } - 18 228 : void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y) - 19 : { - 20 228 : if (!torch::is_floating_point(X)) { - 21 0 : throw std::invalid_argument("X must be a floating point tensor"); - 22 : } - 23 228 : if (torch::is_floating_point(y)) { - 24 0 : throw std::invalid_argument("y must be an integer tensor"); - 25 : } - 26 228 : } - 27 212 : map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model) - 28 : { - 29 : // order of local discretization is important. no good 0, 1, 2... - 30 : // although we rediscretize features after the local discretization of every feature - 31 212 : auto order = model.topological_sort(); - 32 212 : auto& nodes = model.getNodes(); - 33 212 : map<std::string, std::vector<int>> states = oldStates; - 34 212 : std::vector<int> indicesToReDiscretize; - 35 212 : bool upgrade = false; // Flag to check if we need to upgrade the model - 36 1776 : for (auto feature : order) { - 37 1564 : auto nodeParents = nodes[feature]->getParents(); - 38 1564 : if (nodeParents.size() < 2) continue; // Only has class as parent - 39 1324 : upgrade = true; - 40 1324 : int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin(); - 41 1324 : indicesToReDiscretize.push_back(index); // We need to re-discretize this feature - 42 1324 : std::vector<std::string> parents; - 43 4020 : transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); }); - 44 : // Remove class as parent as it will be added later - 45 1324 : parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end()); - 46 : // Get the indices of the parents - 47 1324 : std::vector<int> indices; - 48 1324 : indices.push_back(-1); // Add class index - 49 2696 : transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); }); - 50 : // Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y) - 51 1324 : std::vector<std::string> yJoinParents(Xf.size(1)); - 52 4020 : for (auto idx : indices) { - 53 958640 : for (int i = 0; i < Xf.size(1); ++i) { - 54 2867832 : yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>()); - 55 : } - 56 : } - 57 1324 : auto arff = ArffFiles(); - 58 1324 : auto yxv = arff.factorize(yJoinParents); - 59 2648 : auto xvf_ptr = Xf.index({ index }).data_ptr<float>(); - 60 1324 : auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1)); - 61 1324 : discretizers[feature]->fit(xvf, yxv); - 62 1804 : } - 63 212 : if (upgrade) { - 64 : // Discretize again X (only the affected indices) with the new fitted discretizers - 65 1536 : for (auto index : indicesToReDiscretize) { - 66 2648 : auto Xt_ptr = Xf.index({ index }).data_ptr<float>(); - 67 1324 : auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); - 68 5296 : pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt))); - 69 1324 : auto xStates = std::vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1); - 70 1324 : iota(xStates.begin(), xStates.end(), 0); - 71 : //Update new states of the feature/node - 72 1324 : states[pFeatures[index]] = xStates; - 73 1324 : } - 74 212 : const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); - 75 212 : model.fit(pDataset, weights, pFeatures, pClassName, states); - 76 212 : } - 77 424 : return states; - 78 960128 : } - 79 232 : map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y) - 80 : { - 81 : // Discretize the continuous input data and build pDataset (Classifier::dataset) - 82 232 : int m = Xf.size(1); - 83 232 : int n = Xf.size(0); - 84 232 : map<std::string, std::vector<int>> states; - 85 232 : pDataset = torch::zeros({ n + 1, m }, torch::kInt32); - 86 232 : auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); - 87 : // discretize input data by feature(row) - 88 1944 : for (auto i = 0; i < pFeatures.size(); ++i) { - 89 1712 : auto* discretizer = new mdlp::CPPFImdlp(); - 90 3424 : auto Xt_ptr = Xf.index({ i }).data_ptr<float>(); - 91 1712 : auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); - 92 1712 : discretizer->fit(Xt, yv); - 93 6848 : pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt))); - 94 1712 : auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1); - 95 1712 : iota(xStates.begin(), xStates.end(), 0); - 96 1712 : states[pFeatures[i]] = xStates; - 97 1712 : discretizers[pFeatures[i]] = discretizer; - 98 1712 : } - 99 232 : int n_classes = torch::max(y).item<int>() + 1; - 100 232 : auto yStates = std::vector<int>(n_classes); - 101 232 : iota(yStates.begin(), yStates.end(), 0); - 102 232 : states[pClassName] = yStates; - 103 696 : pDataset.index_put_({ n, "..." }, y); - 104 464 : return states; - 105 3888 : } - 106 168 : torch::Tensor Proposal::prepareX(torch::Tensor& X) - 107 : { - 108 168 : auto Xtd = torch::zeros_like(X, torch::kInt32); - 109 1376 : for (int i = 0; i < X.size(0); ++i) { - 110 1208 : auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1)); - 111 1208 : auto Xd = discretizers[pFeatures[i]]->transform(Xt); - 112 3624 : Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32)); - 113 1208 : } - 114 336 : return Xtd; - 115 1376 : } - 116 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPODE::graph(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const | - -68 | - - -
bayesnet::SPODE::buildModel(at::Tensor const&) | - -1016 | - - -
bayesnet::SPODE::SPODE(int) | - -1124 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPODE::SPODE(int) | - -1124 | - - -
bayesnet::SPODE::buildModel(at::Tensor const&) | - -1016 | - - -
bayesnet::SPODE::graph(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const | - -68 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "SPODE.h" - 8 : - 9 : namespace bayesnet { - 10 : - 11 1124 : SPODE::SPODE(int root) : Classifier(Network()), root(root) {} - 12 : - 13 1016 : void SPODE::buildModel(const torch::Tensor& weights) - 14 : { - 15 : // 0. Add all nodes to the model - 16 1016 : addNodes(); - 17 : // 1. Add edges from the class node to all other nodes - 18 : // 2. Add edges from the root node to all other nodes - 19 25680 : for (int i = 0; i < static_cast<int>(features.size()); ++i) { - 20 24664 : model.addEdge(className, features[i]); - 21 24664 : if (i != root) { - 22 23648 : model.addEdge(features[root], features[i]); - 23 : } - 24 : } - 25 1016 : } - 26 68 : std::vector<std::string> SPODE::graph(const std::string& name) const - 27 : { - 28 68 : return model.graph(name); - 29 : } - 30 : - 31 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPODE::~SPODE() | - -1836 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPODE::~SPODE() | - -1836 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef SPODE_H - 8 : #define SPODE_H - 9 : #include "Classifier.h" - 10 : - 11 : namespace bayesnet { - 12 : class SPODE : public Classifier { - 13 : private: - 14 : int root; - 15 : protected: - 16 : void buildModel(const torch::Tensor& weights) override; - 17 : public: - 18 : explicit SPODE(int root); - 19 1836 : virtual ~SPODE() = default; - 20 : std::vector<std::string> graph(const std::string& name = "SPODE") const override; - 21 : }; - 22 : } - 23 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "SPODELd.h" - 8 : - 9 : namespace bayesnet { - 10 220 : SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} - 11 168 : SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) - 12 : { - 13 168 : checkInput(X_, y_); - 14 168 : Xf = X_; - 15 168 : y = y_; - 16 168 : return commonFit(features_, className_, states_); - 17 : } - 18 : - 19 8 : SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) - 20 : { - 21 8 : if (!torch::is_floating_point(dataset)) { - 22 4 : throw std::runtime_error("Dataset must be a floating point tensor"); - 23 : } - 24 16 : Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); - 25 12 : y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); - 26 4 : return commonFit(features_, className_, states_); - 27 12 : } - 28 : - 29 172 : SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) - 30 : { - 31 172 : features = features_; - 32 172 : className = className_; - 33 : // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - 34 172 : states = fit_local_discretization(y); - 35 : // We have discretized the input data - 36 : // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network - 37 172 : SPODE::fit(dataset, features, className, states); - 38 172 : states = localDiscretizationProposal(states, model); - 39 172 : return *this; - 40 : } - 41 136 : torch::Tensor SPODELd::predict(torch::Tensor& X) - 42 : { - 43 136 : auto Xt = prepareX(X); - 44 272 : return SPODE::predict(Xt); - 45 136 : } - 46 36 : std::vector<std::string> SPODELd::graph(const std::string& name) const - 47 : { - 48 36 : return SPODE::graph(name); - 49 : } - 50 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPODELd::~SPODELd() | - -320 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPODELd::~SPODELd() | - -320 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef SPODELD_H - 8 : #define SPODELD_H - 9 : #include "SPODE.h" - 10 : #include "Proposal.h" - 11 : - 12 : namespace bayesnet { - 13 : class SPODELd : public SPODE, public Proposal { - 14 : public: - 15 : explicit SPODELd(int root); - 16 320 : virtual ~SPODELd() = default; - 17 : SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; - 18 : SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; - 19 : SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states); - 20 : std::vector<std::string> graph(const std::string& name = "SPODE") const override; - 21 : torch::Tensor predict(torch::Tensor& X) override; - 22 : static inline std::string version() { return "0.0.1"; }; - 23 : }; - 24 : } - 25 : #endif // !SPODELD_H -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPnDE::graph(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const | - -24 | - - -
bayesnet::SPnDE::SPnDE(std::vector<int, std::allocator<int> >) | - -456 | - - -
bayesnet::SPnDE::buildModel(at::Tensor const&) | - -456 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPnDE::SPnDE(std::vector<int, std::allocator<int> >) | - -456 | - - -
bayesnet::SPnDE::buildModel(at::Tensor const&) | - -456 | - - -
bayesnet::SPnDE::graph(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const | - -24 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "SPnDE.h" - 8 : - 9 : namespace bayesnet { - 10 : - 11 456 : SPnDE::SPnDE(std::vector<int> parents) : Classifier(Network()), parents(parents) {} - 12 : - 13 456 : void SPnDE::buildModel(const torch::Tensor& weights) - 14 : { - 15 : // 0. Add all nodes to the model - 16 456 : addNodes(); - 17 456 : std::vector<int> attributes; - 18 4440 : for (int i = 0; i < static_cast<int>(features.size()); ++i) { - 19 3984 : if (std::find(parents.begin(), parents.end(), i) == parents.end()) { - 20 3072 : attributes.push_back(i); - 21 : } - 22 : } - 23 : // 1. Add edges from the class node to all other nodes - 24 : // 2. Add edges from the parents nodes to all other nodes - 25 3528 : for (const auto& attribute : attributes) { - 26 3072 : model.addEdge(className, features[attribute]); - 27 9216 : for (const auto& root : parents) { - 28 : - 29 6144 : model.addEdge(features[root], features[attribute]); - 30 : } - 31 : } - 32 456 : } - 33 24 : std::vector<std::string> SPnDE::graph(const std::string& name) const - 34 : { - 35 24 : return model.graph(name); - 36 : } - 37 : - 38 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPnDE::~SPnDE() | - -912 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::SPnDE::~SPnDE() | - -912 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef SPnDE_H - 8 : #define SPnDE_H - 9 : #include <vector> - 10 : #include "Classifier.h" - 11 : - 12 : namespace bayesnet { - 13 : class SPnDE : public Classifier { - 14 : public: - 15 : explicit SPnDE(std::vector<int> parents); - 16 912 : virtual ~SPnDE() = default; - 17 : std::vector<std::string> graph(const std::string& name = "SPnDE") const override; - 18 : protected: - 19 : void buildModel(const torch::Tensor& weights) override; - 20 : private: - 21 : std::vector<int> parents; - 22 : - 23 : - 24 : }; - 25 : } - 26 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::TAN::graph(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const | - -8 | - - -
bayesnet::TAN::buildModel(at::Tensor const&) | - -52 | - - -
bayesnet::TAN::TAN() | - -188 | - - -
auto bayesnet::TAN::buildModel(at::Tensor const&)::{lambda(auto:1 const&, auto:2 const&)#1}::operator()<std::pair<int, float>, std::pair<int, float> >(std::pair<int, float> const&, std::pair<int, float> const&) const | - -648 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
auto bayesnet::TAN::buildModel(at::Tensor const&)::{lambda(auto:1 const&, auto:2 const&)#1}::operator()<std::pair<int, float>, std::pair<int, float> >(std::pair<int, float> const&, std::pair<int, float> const&) const | - -648 | - - -
bayesnet::TAN::TAN() | - -188 | - - -
bayesnet::TAN::buildModel(at::Tensor const&) | - -52 | - - -
bayesnet::TAN::graph(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const | - -8 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "TAN.h" - 8 : - 9 : namespace bayesnet { - 10 188 : TAN::TAN() : Classifier(Network()) {} - 11 : - 12 52 : void TAN::buildModel(const torch::Tensor& weights) - 13 : { - 14 : // 0. Add all nodes to the model - 15 52 : addNodes(); - 16 : // 1. Compute mutual information between each feature and the class and set the root node - 17 : // as the highest mutual information with the class - 18 52 : auto mi = std::vector <std::pair<int, float >>(); - 19 156 : torch::Tensor class_dataset = dataset.index({ -1, "..." }); - 20 356 : for (int i = 0; i < static_cast<int>(features.size()); ++i) { - 21 912 : torch::Tensor feature_dataset = dataset.index({ i, "..." }); - 22 304 : auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights); - 23 304 : mi.push_back({ i, mi_value }); - 24 304 : } - 25 700 : sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;}); - 26 52 : auto root = mi[mi.size() - 1].first; - 27 : // 2. Compute mutual information between each feature and the class - 28 52 : auto weights_matrix = metrics.conditionalEdge(weights); - 29 : // 3. Compute the maximum spanning tree - 30 52 : auto mst = metrics.maximumSpanningTree(features, weights_matrix, root); - 31 : // 4. Add edges from the maximum spanning tree to the model - 32 304 : for (auto i = 0; i < mst.size(); ++i) { - 33 252 : auto [from, to] = mst[i]; - 34 252 : model.addEdge(features[from], features[to]); - 35 : } - 36 : // 5. Add edges from the class to all features - 37 356 : for (auto feature : features) { - 38 304 : model.addEdge(className, feature); - 39 304 : } - 40 408 : } - 41 8 : std::vector<std::string> TAN::graph(const std::string& title) const - 42 : { - 43 8 : return model.graph(title); - 44 : } - 45 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::TAN::~TAN() | - -76 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::TAN::~TAN() | - -76 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef TAN_H - 8 : #define TAN_H - 9 : #include "Classifier.h" - 10 : namespace bayesnet { - 11 : class TAN : public Classifier { - 12 : private: - 13 : protected: - 14 : void buildModel(const torch::Tensor& weights) override; - 15 : public: - 16 : TAN(); - 17 76 : virtual ~TAN() = default; - 18 : std::vector<std::string> graph(const std::string& name = "TAN") const override; - 19 : }; - 20 : } - 21 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "TANLd.h" - 8 : - 9 : namespace bayesnet { - 10 68 : TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} - 11 20 : TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) - 12 : { - 13 20 : checkInput(X_, y_); - 14 20 : features = features_; - 15 20 : className = className_; - 16 20 : Xf = X_; - 17 20 : y = y_; - 18 : // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - 19 20 : states = fit_local_discretization(y); - 20 : // We have discretized the input data - 21 : // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network - 22 20 : TAN::fit(dataset, features, className, states); - 23 20 : states = localDiscretizationProposal(states, model); - 24 20 : return *this; - 25 : - 26 : } - 27 16 : torch::Tensor TANLd::predict(torch::Tensor& X) - 28 : { - 29 16 : auto Xt = prepareX(X); - 30 32 : return TAN::predict(Xt); - 31 16 : } - 32 4 : std::vector<std::string> TANLd::graph(const std::string& name) const - 33 : { - 34 4 : return TAN::graph(name); - 35 : } - 36 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::TANLd::~TANLd() | - -20 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::TANLd::~TANLd() | - -20 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef TANLD_H - 8 : #define TANLD_H - 9 : #include "TAN.h" - 10 : #include "Proposal.h" - 11 : - 12 : namespace bayesnet { - 13 : class TANLd : public TAN, public Proposal { - 14 : private: - 15 : public: - 16 : TANLd(); - 17 20 : virtual ~TANLd() = default; - 18 : TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; - 19 : std::vector<std::string> graph(const std::string& name = "TAN") const override; - 20 : torch::Tensor predict(torch::Tensor& X) override; - 21 : static inline std::string version() { return "0.0.1"; }; - 22 : }; - 23 : } - 24 : #endif // !TANLD_H -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
KDB.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
KDBLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPnDE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TAN.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TANLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODE.cc | -
- |
- 100.0 % | -10 | -10 | -100.0 % | -3 | -3 | -|
SPnDE.cc | -
- |
- 100.0 % | -14 | -14 | -100.0 % | -3 | -3 | -|
Classifier.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -4 | -4 | -|
KDBLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TAN.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
TANLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
KDB.cc | -
- |
- 96.3 % | -54 | -52 | -100.0 % | -5 | -5 | -|
SPODELd.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -6 | -6 | -|
Proposal.cc | -
- |
- 97.7 % | -86 | -84 | -100.0 % | -8 | -8 | -|
Classifier.cc | -
- |
- 100.0 % | -126 | -126 | -100.0 % | -24 | -24 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
KDB.cc | -
- |
- 96.3 % | -54 | -52 | -100.0 % | -5 | -5 | -|
Proposal.cc | -
- |
- 97.7 % | -86 | -84 | -100.0 % | -8 | -8 | -|
KDB.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
KDBLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPnDE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TAN.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TANLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Classifier.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -4 | -4 | -|
SPODE.cc | -
- |
- 100.0 % | -10 | -10 | -100.0 % | -3 | -3 | -|
SPnDE.cc | -
- |
- 100.0 % | -14 | -14 | -100.0 % | -3 | -3 | -|
KDBLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TANLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TAN.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
SPODELd.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -6 | -6 | -|
Classifier.cc | -
- |
- 100.0 % | -126 | -126 | -100.0 % | -24 | -24 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Classifier.cc | -
- |
- 100.0 % | -126 | -126 | -100.0 % | -24 | -24 | -|
Classifier.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -4 | -4 | -|
KDB.cc | -
- |
- 96.3 % | -54 | -52 | -100.0 % | -5 | -5 | -|
KDB.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
KDBLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
KDBLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Proposal.cc | -
- |
- 97.7 % | -86 | -84 | -100.0 % | -8 | -8 | -|
SPODE.cc | -
- |
- 100.0 % | -10 | -10 | -100.0 % | -3 | -3 | -|
SPODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODELd.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -6 | -6 | -|
SPODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPnDE.cc | -
- |
- 100.0 % | -14 | -14 | -100.0 % | -3 | -3 | -|
SPnDE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TAN.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
TAN.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TANLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TANLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
KDB.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
KDBLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPnDE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TAN.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TANLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODE.cc | -
- |
- 100.0 % | -10 | -10 | -100.0 % | -3 | -3 | -|
SPnDE.cc | -
- |
- 100.0 % | -14 | -14 | -100.0 % | -3 | -3 | -|
Classifier.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -4 | -4 | -|
KDBLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TAN.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
TANLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
KDB.cc | -
- |
- 96.3 % | -54 | -52 | -100.0 % | -5 | -5 | -|
SPODELd.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -6 | -6 | -|
Proposal.cc | -
- |
- 97.7 % | -86 | -84 | -100.0 % | -8 | -8 | -|
Classifier.cc | -
- |
- 100.0 % | -126 | -126 | -100.0 % | -24 | -24 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
KDB.cc | -
- |
- 96.3 % | -54 | -52 | -100.0 % | -5 | -5 | -|
Proposal.cc | -
- |
- 97.7 % | -86 | -84 | -100.0 % | -8 | -8 | -|
KDB.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
KDBLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPnDE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TAN.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TANLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Classifier.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -4 | -4 | -|
SPODE.cc | -
- |
- 100.0 % | -10 | -10 | -100.0 % | -3 | -3 | -|
SPnDE.cc | -
- |
- 100.0 % | -14 | -14 | -100.0 % | -3 | -3 | -|
KDBLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TANLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TAN.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
SPODELd.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -6 | -6 | -|
Classifier.cc | -
- |
- 100.0 % | -126 | -126 | -100.0 % | -24 | -24 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Classifier.cc | -
- |
- 100.0 % | -126 | -126 | -100.0 % | -24 | -24 | -|
Classifier.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -4 | -4 | -|
KDB.cc | -
- |
- 96.3 % | -54 | -52 | -100.0 % | -5 | -5 | -|
KDB.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
KDBLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
KDBLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Proposal.cc | -
- |
- 97.7 % | -86 | -84 | -100.0 % | -8 | -8 | -|
SPODE.cc | -
- |
- 100.0 % | -10 | -10 | -100.0 % | -3 | -3 | -|
SPODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPODELd.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -6 | -6 | -|
SPODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
SPnDE.cc | -
- |
- 100.0 % | -14 | -14 | -100.0 % | -3 | -3 | -|
SPnDE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TAN.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
TAN.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
TANLd.cc | -
- |
- 100.0 % | -17 | -17 | -100.0 % | -4 | -4 | -|
TANLd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "A2DE.h" - 8 : - 9 : namespace bayesnet { - 10 12 : A2DE::A2DE(bool predict_voting) : Ensemble(predict_voting) - 11 : { - 12 24 : validHyperparameters = { "predict_voting" }; - 13 36 : } - 14 8 : void A2DE::setHyperparameters(const nlohmann::json& hyperparameters_) - 15 : { - 16 8 : auto hyperparameters = hyperparameters_; - 17 8 : if (hyperparameters.contains("predict_voting")) { - 18 8 : predict_voting = hyperparameters["predict_voting"]; - 19 8 : hyperparameters.erase("predict_voting"); - 20 : } - 21 8 : Classifier::setHyperparameters(hyperparameters); - 22 8 : } - 23 16 : void A2DE::buildModel(const torch::Tensor& weights) - 24 : { - 25 16 : models.clear(); - 26 16 : significanceModels.clear(); - 27 124 : for (int i = 0; i < features.size() - 1; ++i) { - 28 564 : for (int j = i + 1; j < features.size(); ++j) { - 29 456 : auto model = std::make_unique<SPnDE>(std::vector<int>({ i, j })); - 30 456 : models.push_back(std::move(model)); - 31 456 : } - 32 : } - 33 16 : n_models = static_cast<unsigned>(models.size()); - 34 16 : significanceModels = std::vector<double>(n_models, 1.0); - 35 16 : } - 36 4 : std::vector<std::string> A2DE::graph(const std::string& title) const - 37 : { - 38 4 : return Ensemble::graph(title); - 39 : } - 40 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::A2DE::~A2DE() | - -12 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::A2DE::~A2DE() | - -12 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef A2DE_H - 8 : #define A2DE_H - 9 : #include "bayesnet/classifiers/SPnDE.h" - 10 : #include "Ensemble.h" - 11 : namespace bayesnet { - 12 : class A2DE : public Ensemble { - 13 : public: - 14 : A2DE(bool predict_voting = false); - 15 12 : virtual ~A2DE() {}; - 16 : void setHyperparameters(const nlohmann::json& hyperparameters) override; - 17 : std::vector<std::string> graph(const std::string& title = "A2DE") const override; - 18 : protected: - 19 : void buildModel(const torch::Tensor& weights) override; - 20 : }; - 21 : } - 22 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "AODE.h" - 8 : - 9 : namespace bayesnet { - 10 76 : AODE::AODE(bool predict_voting) : Ensemble(predict_voting) - 11 : { - 12 152 : validHyperparameters = { "predict_voting" }; - 13 : - 14 228 : } - 15 4 : void AODE::setHyperparameters(const nlohmann::json& hyperparameters_) - 16 : { - 17 4 : auto hyperparameters = hyperparameters_; - 18 4 : if (hyperparameters.contains("predict_voting")) { - 19 4 : predict_voting = hyperparameters["predict_voting"]; - 20 4 : hyperparameters.erase("predict_voting"); - 21 : } - 22 4 : Classifier::setHyperparameters(hyperparameters); - 23 4 : } - 24 24 : void AODE::buildModel(const torch::Tensor& weights) - 25 : { - 26 24 : models.clear(); - 27 24 : significanceModels.clear(); - 28 188 : for (int i = 0; i < features.size(); ++i) { - 29 164 : models.push_back(std::make_unique<SPODE>(i)); - 30 : } - 31 24 : n_models = models.size(); - 32 24 : significanceModels = std::vector<double>(n_models, 1.0); - 33 24 : } - 34 4 : std::vector<std::string> AODE::graph(const std::string& title) const - 35 : { - 36 4 : return Ensemble::graph(title); - 37 : } - 38 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::AODE::~AODE() | - -28 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::AODE::~AODE() | - -28 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef AODE_H - 8 : #define AODE_H - 9 : #include "bayesnet/classifiers/SPODE.h" - 10 : #include "Ensemble.h" - 11 : namespace bayesnet { - 12 : class AODE : public Ensemble { - 13 : public: - 14 : AODE(bool predict_voting = false); - 15 28 : virtual ~AODE() {}; - 16 : void setHyperparameters(const nlohmann::json& hyperparameters) override; - 17 : std::vector<std::string> graph(const std::string& title = "AODE") const override; - 18 : protected: - 19 : void buildModel(const torch::Tensor& weights) override; - 20 : }; - 21 : } - 22 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "AODELd.h" - 8 : - 9 : namespace bayesnet { - 10 68 : AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className) - 11 : { - 12 68 : } - 13 20 : AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) - 14 : { - 15 20 : checkInput(X_, y_); - 16 20 : features = features_; - 17 20 : className = className_; - 18 20 : Xf = X_; - 19 20 : y = y_; - 20 : // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - 21 20 : states = fit_local_discretization(y); - 22 : // We have discretized the input data - 23 : // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network - 24 20 : Ensemble::fit(dataset, features, className, states); - 25 20 : return *this; - 26 : - 27 : } - 28 20 : void AODELd::buildModel(const torch::Tensor& weights) - 29 : { - 30 20 : models.clear(); - 31 168 : for (int i = 0; i < features.size(); ++i) { - 32 148 : models.push_back(std::make_unique<SPODELd>(i)); - 33 : } - 34 20 : n_models = models.size(); - 35 20 : significanceModels = std::vector<double>(n_models, 1.0); - 36 20 : } - 37 20 : void AODELd::trainModel(const torch::Tensor& weights) - 38 : { - 39 168 : for (const auto& model : models) { - 40 148 : model->fit(Xf, y, features, className, states); - 41 : } - 42 20 : } - 43 4 : std::vector<std::string> AODELd::graph(const std::string& name) const - 44 : { - 45 4 : return Ensemble::graph(name); - 46 : } - 47 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::AODELd::~AODELd() | - -20 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::AODELd::~AODELd() | - -20 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef AODELD_H - 8 : #define AODELD_H - 9 : #include "bayesnet/classifiers/Proposal.h" - 10 : #include "bayesnet/classifiers/SPODELd.h" - 11 : #include "Ensemble.h" - 12 : - 13 : namespace bayesnet { - 14 : class AODELd : public Ensemble, public Proposal { - 15 : public: - 16 : AODELd(bool predict_voting = true); - 17 20 : virtual ~AODELd() = default; - 18 : AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override; - 19 : std::vector<std::string> graph(const std::string& name = "AODELd") const override; - 20 : protected: - 21 : void trainModel(const torch::Tensor& weights) override; - 22 : void buildModel(const torch::Tensor& weights) override; - 23 : }; - 24 : } - 25 : #endif // !AODELD_H -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <set> - 8 : #include <functional> - 9 : #include <limits.h> - 10 : #include <tuple> - 11 : #include <folding.hpp> - 12 : #include "bayesnet/feature_selection/CFS.h" - 13 : #include "bayesnet/feature_selection/FCBF.h" - 14 : #include "bayesnet/feature_selection/IWSS.h" - 15 : #include "BoostAODE.h" - 16 : #include "lib/log/loguru.cpp" - 17 : - 18 : namespace bayesnet { - 19 : - 20 168 : BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting) - 21 : { - 22 1848 : validHyperparameters = { - 23 : "maxModels", "bisection", "order", "convergence", "convergence_best", "threshold", - 24 : "select_features", "maxTolerance", "predict_voting", "block_update" - 25 1848 : }; - 26 : - 27 504 : } - 28 92 : void BoostAODE::buildModel(const torch::Tensor& weights) - 29 : { - 30 : // Models shall be built in trainModel - 31 92 : models.clear(); - 32 92 : significanceModels.clear(); - 33 92 : n_models = 0; - 34 : // Prepare the validation dataset - 35 276 : auto y_ = dataset.index({ -1, "..." }); - 36 92 : if (convergence) { - 37 : // Prepare train & validation sets from train data - 38 76 : auto fold = folding::StratifiedKFold(5, y_, 271); - 39 76 : auto [train, test] = fold.getFold(0); - 40 76 : auto train_t = torch::tensor(train); - 41 76 : auto test_t = torch::tensor(test); - 42 : // Get train and validation sets - 43 380 : X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t }); - 44 228 : y_train = dataset.index({ -1, train_t }); - 45 380 : X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t }); - 46 228 : y_test = dataset.index({ -1, test_t }); - 47 76 : dataset = X_train; - 48 76 : m = X_train.size(1); - 49 76 : auto n_classes = states.at(className).size(); - 50 : // Build dataset with train data - 51 76 : buildDataset(y_train); - 52 76 : metrics = Metrics(dataset, features, className, n_classes); - 53 76 : } else { - 54 : // Use all data to train - 55 64 : X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }); - 56 16 : y_train = y_; - 57 : } - 58 900 : } - 59 88 : void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_) - 60 : { - 61 88 : auto hyperparameters = hyperparameters_; - 62 88 : if (hyperparameters.contains("order")) { - 63 100 : std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND }; - 64 20 : order_algorithm = hyperparameters["order"]; - 65 20 : if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) { - 66 4 : throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]"); - 67 : } - 68 16 : hyperparameters.erase("order"); - 69 20 : } - 70 84 : if (hyperparameters.contains("convergence")) { - 71 36 : convergence = hyperparameters["convergence"]; - 72 36 : hyperparameters.erase("convergence"); - 73 : } - 74 84 : if (hyperparameters.contains("convergence_best")) { - 75 12 : convergence_best = hyperparameters["convergence_best"]; - 76 12 : hyperparameters.erase("convergence_best"); - 77 : } - 78 84 : if (hyperparameters.contains("bisection")) { - 79 32 : bisection = hyperparameters["bisection"]; - 80 32 : hyperparameters.erase("bisection"); - 81 : } - 82 84 : if (hyperparameters.contains("threshold")) { - 83 24 : threshold = hyperparameters["threshold"]; - 84 24 : hyperparameters.erase("threshold"); - 85 : } - 86 84 : if (hyperparameters.contains("maxTolerance")) { - 87 44 : maxTolerance = hyperparameters["maxTolerance"]; - 88 44 : if (maxTolerance < 1 || maxTolerance > 4) - 89 12 : throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]"); - 90 32 : hyperparameters.erase("maxTolerance"); - 91 : } - 92 72 : if (hyperparameters.contains("predict_voting")) { - 93 4 : predict_voting = hyperparameters["predict_voting"]; - 94 4 : hyperparameters.erase("predict_voting"); - 95 : } - 96 72 : if (hyperparameters.contains("select_features")) { - 97 36 : auto selectedAlgorithm = hyperparameters["select_features"]; - 98 180 : std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF }; - 99 36 : selectFeatures = true; - 100 36 : select_features_algorithm = selectedAlgorithm; - 101 36 : if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) { - 102 4 : throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]"); - 103 : } - 104 32 : hyperparameters.erase("select_features"); - 105 40 : } - 106 68 : if (hyperparameters.contains("block_update")) { - 107 8 : block_update = hyperparameters["block_update"]; - 108 8 : hyperparameters.erase("block_update"); - 109 : } - 110 68 : Classifier::setHyperparameters(hyperparameters); - 111 144 : } - 112 544 : std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights) - 113 : { - 114 544 : bool terminate = false; - 115 544 : double alpha_t = 0; - 116 544 : auto mask_wrong = ypred != ytrain; - 117 544 : auto mask_right = ypred == ytrain; - 118 544 : auto masked_weights = weights * mask_wrong.to(weights.dtype()); - 119 544 : double epsilon_t = masked_weights.sum().item<double>(); - 120 544 : if (epsilon_t > 0.5) { - 121 : // Inverse the weights policy (plot ln(wt)) - 122 : // "In each round of AdaBoost, there is a sanity check to ensure that the current base - 123 : // learner is better than random guess" (Zhi-Hua Zhou, 2012) - 124 16 : terminate = true; - 125 : } else { - 126 528 : double wt = (1 - epsilon_t) / epsilon_t; - 127 528 : alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt); - 128 : // Step 3.2: Update weights for next classifier - 129 : // Step 3.2.1: Update weights of wrong samples - 130 528 : weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights; - 131 : // Step 3.2.2: Update weights of right samples - 132 528 : weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights; - 133 : // Step 3.3: Normalise the weights - 134 528 : double totalWeights = torch::sum(weights).item<double>(); - 135 528 : weights = weights / totalWeights; - 136 : } - 137 1088 : return { weights, alpha_t, terminate }; - 138 544 : } - 139 28 : std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights) - 140 : { - 141 : /* Update Block algorithm - 142 : k = # of models in block - 143 : n_models = # of models in ensemble to make predictions - 144 : n_models_bak = # models saved - 145 : models = vector of models to make predictions - 146 : models_bak = models not used to make predictions - 147 : significances_bak = backup of significances vector - 148 : - 149 : Case list - 150 : A) k = 1, n_models = 1 => n = 0 , n_models = n + k - 151 : B) k = 1, n_models = n + 1 => n_models = n + k - 152 : C) k > 1, n_models = k + 1 => n= 1, n_models = n + k - 153 : D) k > 1, n_models = k => n = 0, n_models = n + k - 154 : E) k > 1, n_models = k + n => n_models = n + k - 155 : - 156 : A, D) n=0, k > 0, n_models == k - 157 : 1. n_models_bak <- n_models - 158 : 2. significances_bak <- significances - 159 : 3. significances = vector(k, 1) - 160 : 4. Don’t move any classifiers out of models - 161 : 5. n_models <- k - 162 : 6. Make prediction, compute alpha, update weights - 163 : 7. Don’t restore any classifiers to models - 164 : 8. significances <- significances_bak - 165 : 9. Update last k significances - 166 : 10. n_models <- n_models_bak - 167 : - 168 : B, C, E) n > 0, k > 0, n_models == n + k - 169 : 1. n_models_bak <- n_models - 170 : 2. significances_bak <- significances - 171 : 3. significances = vector(k, 1) - 172 : 4. Move first n classifiers to models_bak - 173 : 5. n_models <- k - 174 : 6. Make prediction, compute alpha, update weights - 175 : 7. Insert classifiers in models_bak to be the first n models - 176 : 8. significances <- significances_bak - 177 : 9. Update last k significances - 178 : 10. n_models <- n_models_bak - 179 : */ - 180 : // - 181 : // Make predict with only the last k models - 182 : // - 183 28 : std::unique_ptr<Classifier> model; - 184 28 : std::vector<std::unique_ptr<Classifier>> models_bak; - 185 : // 1. n_models_bak <- n_models 2. significances_bak <- significances - 186 28 : auto significance_bak = significanceModels; - 187 28 : auto n_models_bak = n_models; - 188 : // 3. significances = vector(k, 1) - 189 28 : significanceModels = std::vector<double>(k, 1.0); - 190 : // 4. Move first n classifiers to models_bak - 191 : // backup the first n_models - k models (if n_models == k, don't backup any) - 192 148 : for (int i = 0; i < n_models - k; ++i) { - 193 120 : model = std::move(models[0]); - 194 120 : models.erase(models.begin()); - 195 120 : models_bak.push_back(std::move(model)); - 196 : } - 197 28 : assert(models.size() == k); - 198 : // 5. n_models <- k - 199 28 : n_models = k; - 200 : // 6. Make prediction, compute alpha, update weights - 201 28 : auto ypred = predict(X_train); - 202 : // - 203 : // Update weights - 204 : // - 205 : double alpha_t; - 206 : bool terminate; - 207 28 : std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights); - 208 : // - 209 : // Restore the models if needed - 210 : // - 211 : // 7. Insert classifiers in models_bak to be the first n models - 212 : // if n_models_bak == k, don't restore any, because none of them were moved - 213 28 : if (k != n_models_bak) { - 214 : // Insert in the same order as they were extracted - 215 24 : int bak_size = models_bak.size(); - 216 144 : for (int i = 0; i < bak_size; ++i) { - 217 120 : model = std::move(models_bak[bak_size - 1 - i]); - 218 120 : models_bak.erase(models_bak.end() - 1); - 219 120 : models.insert(models.begin(), std::move(model)); - 220 : } - 221 : } - 222 : // 8. significances <- significances_bak - 223 28 : significanceModels = significance_bak; - 224 : // - 225 : // Update the significance of the last k models - 226 : // - 227 : // 9. Update last k significances - 228 104 : for (int i = 0; i < k; ++i) { - 229 76 : significanceModels[n_models_bak - k + i] = alpha_t; - 230 : } - 231 : // 10. n_models <- n_models_bak - 232 28 : n_models = n_models_bak; - 233 56 : return { weights, alpha_t, terminate }; - 234 28 : } - 235 32 : std::vector<int> BoostAODE::initializeModels() - 236 : { - 237 32 : std::vector<int> featuresUsed; - 238 32 : torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); - 239 32 : int maxFeatures = 0; - 240 32 : if (select_features_algorithm == SelectFeatures.CFS) { - 241 8 : featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_); - 242 24 : } else if (select_features_algorithm == SelectFeatures.IWSS) { - 243 12 : if (threshold < 0 || threshold >0.5) { - 244 8 : throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]"); - 245 : } - 246 4 : featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); - 247 12 : } else if (select_features_algorithm == SelectFeatures.FCBF) { - 248 12 : if (threshold < 1e-7 || threshold > 1) { - 249 8 : throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]"); - 250 : } - 251 4 : featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); - 252 : } - 253 16 : featureSelector->fit(); - 254 16 : auto cfsFeatures = featureSelector->getFeatures(); - 255 16 : auto scores = featureSelector->getScores(); - 256 100 : for (const int& feature : cfsFeatures) { - 257 84 : featuresUsed.push_back(feature); - 258 84 : std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature); - 259 84 : model->fit(dataset, features, className, states, weights_); - 260 84 : models.push_back(std::move(model)); - 261 84 : significanceModels.push_back(1.0); // They will be updated later in trainModel - 262 84 : n_models++; - 263 84 : } - 264 16 : notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); - 265 16 : delete featureSelector; - 266 32 : return featuresUsed; - 267 48 : } - 268 92 : void BoostAODE::trainModel(const torch::Tensor& weights) - 269 : { - 270 : // - 271 : // Logging setup - 272 : // - 273 92 : loguru::set_thread_name("BoostAODE"); - 274 92 : loguru::g_stderr_verbosity = loguru::Verbosity_OFF; - 275 92 : loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX); - 276 : - 277 : // Algorithm based on the adaboost algorithm for classification - 278 : // as explained in Ensemble methods (Zhi-Hua Zhou, 2012) - 279 92 : fitted = true; - 280 92 : double alpha_t = 0; - 281 92 : torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); - 282 92 : bool finished = false; - 283 92 : std::vector<int> featuresUsed; - 284 92 : if (selectFeatures) { - 285 32 : featuresUsed = initializeModels(); - 286 16 : auto ypred = predict(X_train); - 287 16 : std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); - 288 : // Update significance of the models - 289 100 : for (int i = 0; i < n_models; ++i) { - 290 84 : significanceModels[i] = alpha_t; - 291 : } - 292 16 : if (finished) { - 293 0 : return; - 294 : } - 295 16 : } - 296 76 : int numItemsPack = 0; // The counter of the models inserted in the current pack - 297 : // Variables to control the accuracy finish condition - 298 76 : double priorAccuracy = 0.0; - 299 76 : double improvement = 1.0; - 300 76 : double convergence_threshold = 1e-4; - 301 76 : int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold - 302 : // Step 0: Set the finish condition - 303 : // epsilon sub t > 0.5 => inverse the weights policy - 304 : // validation error is not decreasing - 305 : // run out of features - 306 76 : bool ascending = order_algorithm == Orders.ASC; - 307 76 : std::mt19937 g{ 173 }; - 308 504 : while (!finished) { - 309 : // Step 1: Build ranking with mutual information - 310 428 : auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted - 311 428 : if (order_algorithm == Orders.RAND) { - 312 36 : std::shuffle(featureSelection.begin(), featureSelection.end(), g); - 313 : } - 314 : // Remove used features - 315 856 : featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) - 316 38800 : { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}), - 317 428 : end(featureSelection) - 318 : ); - 319 428 : int k = bisection ? pow(2, tolerance) : 1; - 320 428 : int counter = 0; // The model counter of the current pack - 321 428 : VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size()); - 322 1004 : while (counter++ < k && featureSelection.size() > 0) { - 323 576 : auto feature = featureSelection[0]; - 324 576 : featureSelection.erase(featureSelection.begin()); - 325 576 : std::unique_ptr<Classifier> model; - 326 576 : model = std::make_unique<SPODE>(feature); - 327 576 : model->fit(dataset, features, className, states, weights_); - 328 576 : alpha_t = 0.0; - 329 576 : if (!block_update) { - 330 500 : auto ypred = model->predict(X_train); - 331 : // Step 3.1: Compute the classifier amout of say - 332 500 : std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); - 333 500 : } - 334 : // Step 3.4: Store classifier and its accuracy to weigh its future vote - 335 576 : numItemsPack++; - 336 576 : featuresUsed.push_back(feature); - 337 576 : models.push_back(std::move(model)); - 338 576 : significanceModels.push_back(alpha_t); - 339 576 : n_models++; - 340 576 : VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size()); - 341 576 : } - 342 428 : if (block_update) { - 343 28 : std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_); - 344 : } - 345 428 : if (convergence && !finished) { - 346 296 : auto y_val_predict = predict(X_test); - 347 296 : double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0); - 348 296 : if (priorAccuracy == 0) { - 349 60 : priorAccuracy = accuracy; - 350 : } else { - 351 236 : improvement = accuracy - priorAccuracy; - 352 : } - 353 296 : if (improvement < convergence_threshold) { - 354 176 : VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); - 355 176 : tolerance++; - 356 176 : } else { - 357 120 : VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); - 358 120 : tolerance = 0; // Reset the counter if the model performs better - 359 120 : numItemsPack = 0; - 360 120 : } - 361 296 : if (convergence_best) { - 362 : // Keep the best accuracy until now as the prior accuracy - 363 32 : priorAccuracy = std::max(accuracy, priorAccuracy); - 364 : } else { - 365 : // Keep the last accuray obtained as the prior accuracy - 366 264 : priorAccuracy = accuracy; - 367 : } - 368 296 : } - 369 428 : VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size()); - 370 428 : finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); - 371 428 : } - 372 76 : if (tolerance > maxTolerance) { - 373 8 : if (numItemsPack < n_models) { - 374 8 : notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); - 375 8 : VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models); - 376 104 : for (int i = 0; i < numItemsPack; ++i) { - 377 96 : significanceModels.pop_back(); - 378 96 : models.pop_back(); - 379 96 : n_models--; - 380 : } - 381 8 : } else { - 382 0 : notes.push_back("Convergence threshold reached & 0 models eliminated"); - 383 0 : VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack); - 384 0 : } - 385 : } - 386 76 : if (featuresUsed.size() != features.size()) { - 387 4 : notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size())); - 388 4 : status = WARNING; - 389 : } - 390 76 : notes.push_back("Number of models: " + std::to_string(n_models)); - 391 108 : } - 392 4 : std::vector<std::string> BoostAODE::graph(const std::string& title) const - 393 : { - 394 4 : return Ensemble::graph(title); - 395 : } - 396 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::BoostAODE::~BoostAODE() | - -88 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::BoostAODE::~BoostAODE() | - -88 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef BOOSTAODE_H - 8 : #define BOOSTAODE_H - 9 : #include <map> - 10 : #include "bayesnet/classifiers/SPODE.h" - 11 : #include "bayesnet/feature_selection/FeatureSelect.h" - 12 : #include "Ensemble.h" - 13 : namespace bayesnet { - 14 : const struct { - 15 : std::string CFS = "CFS"; - 16 : std::string FCBF = "FCBF"; - 17 : std::string IWSS = "IWSS"; - 18 : }SelectFeatures; - 19 : const struct { - 20 : std::string ASC = "asc"; - 21 : std::string DESC = "desc"; - 22 : std::string RAND = "rand"; - 23 : }Orders; - 24 : class BoostAODE : public Ensemble { - 25 : public: - 26 : explicit BoostAODE(bool predict_voting = false); - 27 88 : virtual ~BoostAODE() = default; - 28 : std::vector<std::string> graph(const std::string& title = "BoostAODE") const override; - 29 : void setHyperparameters(const nlohmann::json& hyperparameters_) override; - 30 : protected: - 31 : void buildModel(const torch::Tensor& weights) override; - 32 : void trainModel(const torch::Tensor& weights) override; - 33 : private: - 34 : std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights); - 35 : std::vector<int> initializeModels(); - 36 : torch::Tensor X_train, y_train, X_test, y_test; - 37 : // Hyperparameters - 38 : bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble - 39 : int maxTolerance = 3; - 40 : std::string order_algorithm; // order to process the KBest features asc, desc, rand - 41 : bool convergence = true; //if true, stop when the model does not improve - 42 : bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy - 43 : bool selectFeatures = false; // if true, use feature selection - 44 : std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm - 45 : FeatureSelect* featureSelector = nullptr; - 46 : double threshold = -1; - 47 : bool block_update = false; - 48 : }; - 49 : } - 50 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "Ensemble.h" - 8 : - 9 : namespace bayesnet { - 10 : - 11 324 : Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) - 12 : { - 13 : - 14 324 : }; - 15 : const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; - 16 40 : void Ensemble::trainModel(const torch::Tensor& weights) - 17 : { - 18 40 : n_models = models.size(); - 19 660 : for (auto i = 0; i < n_models; ++i) { - 20 : // fit with std::vectors - 21 620 : models[i]->fit(dataset, features, className, states); - 22 : } - 23 40 : } - 24 56 : std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X) - 25 : { - 26 56 : std::vector<int> y_pred; - 27 12400 : for (auto i = 0; i < X.size(); ++i) { - 28 12344 : auto max = std::max_element(X[i].begin(), X[i].end()); - 29 24688 : y_pred.push_back(std::distance(X[i].begin(), max)); - 30 : } - 31 112 : return y_pred; - 32 56 : } - 33 424 : torch::Tensor Ensemble::compute_arg_max(torch::Tensor& X) - 34 : { - 35 424 : auto y_pred = torch::argmax(X, 1); - 36 848 : return y_pred; - 37 424 : } - 38 164 : torch::Tensor Ensemble::voting(torch::Tensor& votes) - 39 : { - 40 : // Convert m x n_models tensor to a m x n_class_states with voting probabilities - 41 164 : auto y_pred_ = votes.accessor<int, 2>(); - 42 164 : std::vector<int> y_pred_final; - 43 164 : int numClasses = states.at(className).size(); - 44 : // votes is m x n_models with the prediction of every model for each sample - 45 164 : auto result = torch::zeros({ votes.size(0), numClasses }, torch::kFloat32); - 46 164 : auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); - 47 42084 : for (int i = 0; i < votes.size(0); ++i) { - 48 : // n_votes store in each index (value of class) the significance added by each model - 49 : // i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions - 50 41920 : std::vector<double> n_votes(numClasses, 0.0); - 51 375272 : for (int j = 0; j < n_models; ++j) { - 52 333352 : n_votes[y_pred_[i][j]] += significanceModels.at(j); - 53 : } - 54 41920 : result[i] = torch::tensor(n_votes); - 55 41920 : } - 56 : // To only do one division and gain precision - 57 164 : result /= sum; - 58 328 : return result; - 59 164 : } - 60 100 : std::vector<std::vector<double>> Ensemble::predict_proba(std::vector<std::vector<int>>& X) - 61 : { - 62 100 : if (!fitted) { - 63 24 : throw std::logic_error(ENSEMBLE_NOT_FITTED); - 64 : } - 65 76 : return predict_voting ? predict_average_voting(X) : predict_average_proba(X); - 66 : } - 67 452 : torch::Tensor Ensemble::predict_proba(torch::Tensor& X) - 68 : { - 69 452 : if (!fitted) { - 70 24 : throw std::logic_error(ENSEMBLE_NOT_FITTED); - 71 : } - 72 428 : return predict_voting ? predict_average_voting(X) : predict_average_proba(X); - 73 : } - 74 68 : std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X) - 75 : { - 76 68 : auto res = predict_proba(X); - 77 104 : return compute_arg_max(res); - 78 52 : } - 79 436 : torch::Tensor Ensemble::predict(torch::Tensor& X) - 80 : { - 81 436 : auto res = predict_proba(X); - 82 840 : return compute_arg_max(res); - 83 420 : } - 84 296 : torch::Tensor Ensemble::predict_average_proba(torch::Tensor& X) - 85 : { - 86 296 : auto n_states = models[0]->getClassNumStates(); - 87 296 : torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32); - 88 296 : auto threads{ std::vector<std::thread>() }; - 89 296 : std::mutex mtx; - 90 1764 : for (auto i = 0; i < n_models; ++i) { - 91 1468 : threads.push_back(std::thread([&, i]() { - 92 1468 : auto ypredict = models[i]->predict_proba(X); - 93 1468 : std::lock_guard<std::mutex> lock(mtx); - 94 1468 : y_pred += ypredict * significanceModels[i]; - 95 1468 : })); - 96 : } - 97 1764 : for (auto& thread : threads) { - 98 1468 : thread.join(); - 99 : } - 100 296 : auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); - 101 296 : y_pred /= sum; - 102 592 : return y_pred; - 103 296 : } - 104 44 : std::vector<std::vector<double>> Ensemble::predict_average_proba(std::vector<std::vector<int>>& X) - 105 : { - 106 44 : auto n_states = models[0]->getClassNumStates(); - 107 44 : std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0)); - 108 44 : auto threads{ std::vector<std::thread>() }; - 109 44 : std::mutex mtx; - 110 576 : for (auto i = 0; i < n_models; ++i) { - 111 532 : threads.push_back(std::thread([&, i]() { - 112 532 : auto ypredict = models[i]->predict_proba(X); - 113 532 : assert(ypredict.size() == y_pred.size()); - 114 532 : assert(ypredict[0].size() == y_pred[0].size()); - 115 532 : std::lock_guard<std::mutex> lock(mtx); - 116 : // Multiply each prediction by the significance of the model and then add it to the final prediction - 117 110284 : for (auto j = 0; j < ypredict.size(); ++j) { - 118 109752 : std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(), - 119 739464 : [significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; }); - 120 : } - 121 532 : })); - 122 : } - 123 576 : for (auto& thread : threads) { - 124 532 : thread.join(); - 125 : } - 126 44 : auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); - 127 : //Divide each element of the prediction by the sum of the significances - 128 8436 : for (auto j = 0; j < y_pred.size(); ++j) { - 129 51544 : std::transform(y_pred[j].begin(), y_pred[j].end(), y_pred[j].begin(), [sum](double x) { return x / sum; }); - 130 : } - 131 88 : return y_pred; - 132 44 : } - 133 32 : std::vector<std::vector<double>> Ensemble::predict_average_voting(std::vector<std::vector<int>>& X) - 134 : { - 135 32 : torch::Tensor Xt = bayesnet::vectorToTensor(X, false); - 136 32 : auto y_pred = predict_average_voting(Xt); - 137 32 : std::vector<std::vector<double>> result = tensorToVectorDouble(y_pred); - 138 64 : return result; - 139 32 : } - 140 164 : torch::Tensor Ensemble::predict_average_voting(torch::Tensor& X) - 141 : { - 142 : // Build a m x n_models tensor with the predictions of each model - 143 164 : torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); - 144 164 : auto threads{ std::vector<std::thread>() }; - 145 164 : std::mutex mtx; - 146 1380 : for (auto i = 0; i < n_models; ++i) { - 147 1216 : threads.push_back(std::thread([&, i]() { - 148 1216 : auto ypredict = models[i]->predict(X); - 149 1216 : std::lock_guard<std::mutex> lock(mtx); - 150 3648 : y_pred.index_put_({ "...", i }, ypredict); - 151 2432 : })); - 152 : } - 153 1380 : for (auto& thread : threads) { - 154 1216 : thread.join(); - 155 : } - 156 328 : return voting(y_pred); - 157 164 : } - 158 80 : float Ensemble::score(torch::Tensor& X, torch::Tensor& y) - 159 : { - 160 80 : auto y_pred = predict(X); - 161 72 : int correct = 0; - 162 22584 : for (int i = 0; i < y_pred.size(0); ++i) { - 163 22512 : if (y_pred[i].item<int>() == y[i].item<int>()) { - 164 19668 : correct++; - 165 : } - 166 : } - 167 144 : return (double)correct / y_pred.size(0); - 168 72 : } - 169 52 : float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y) - 170 : { - 171 52 : auto y_pred = predict(X); - 172 44 : int correct = 0; - 173 11164 : for (int i = 0; i < y_pred.size(); ++i) { - 174 11120 : if (y_pred[i] == y[i]) { - 175 9276 : correct++; - 176 : } - 177 : } - 178 88 : return (double)correct / y_pred.size(); - 179 44 : } - 180 4 : std::vector<std::string> Ensemble::show() const - 181 : { - 182 4 : auto result = std::vector<std::string>(); - 183 20 : for (auto i = 0; i < n_models; ++i) { - 184 16 : auto res = models[i]->show(); - 185 16 : result.insert(result.end(), res.begin(), res.end()); - 186 16 : } - 187 8 : return result; - 188 4 : } - 189 16 : std::vector<std::string> Ensemble::graph(const std::string& title) const - 190 : { - 191 16 : auto result = std::vector<std::string>(); - 192 108 : for (auto i = 0; i < n_models; ++i) { - 193 92 : auto res = models[i]->graph(title + "_" + std::to_string(i)); - 194 92 : result.insert(result.end(), res.begin(), res.end()); - 195 92 : } - 196 32 : return result; - 197 16 : } - 198 28 : int Ensemble::getNumberOfNodes() const - 199 : { - 200 28 : int nodes = 0; - 201 348 : for (auto i = 0; i < n_models; ++i) { - 202 320 : nodes += models[i]->getNumberOfNodes(); - 203 : } - 204 28 : return nodes; - 205 : } - 206 28 : int Ensemble::getNumberOfEdges() const - 207 : { - 208 28 : int edges = 0; - 209 348 : for (auto i = 0; i < n_models; ++i) { - 210 320 : edges += models[i]->getNumberOfEdges(); - 211 : } - 212 28 : return edges; - 213 : } - 214 4 : int Ensemble::getNumberOfStates() const - 215 : { - 216 4 : int nstates = 0; - 217 20 : for (auto i = 0; i < n_models; ++i) { - 218 16 : nstates += models[i]->getNumberOfStates(); - 219 : } - 220 4 : return nstates; - 221 : } - 222 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Ensemble::dump_cpt[abi:cxx11]() const | - -8 | - - -
bayesnet::Ensemble::topological_order[abi:cxx11]() | - -12 | - - -
bayesnet::Ensemble::~Ensemble() | - -124 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Ensemble::dump_cpt[abi:cxx11]() const | - -8 | - - -
bayesnet::Ensemble::topological_order[abi:cxx11]() | - -12 | - - -
bayesnet::Ensemble::~Ensemble() | - -124 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef ENSEMBLE_H - 8 : #define ENSEMBLE_H - 9 : #include <torch/torch.h> - 10 : #include "bayesnet/utils/BayesMetrics.h" - 11 : #include "bayesnet/utils/bayesnetUtils.h" - 12 : #include "bayesnet/classifiers/Classifier.h" - 13 : - 14 : namespace bayesnet { - 15 : class Ensemble : public Classifier { - 16 : public: - 17 : Ensemble(bool predict_voting = true); - 18 124 : virtual ~Ensemble() = default; - 19 : torch::Tensor predict(torch::Tensor& X) override; - 20 : std::vector<int> predict(std::vector<std::vector<int>>& X) override; - 21 : torch::Tensor predict_proba(torch::Tensor& X) override; - 22 : std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override; - 23 : float score(torch::Tensor& X, torch::Tensor& y) override; - 24 : float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override; - 25 : int getNumberOfNodes() const override; - 26 : int getNumberOfEdges() const override; - 27 : int getNumberOfStates() const override; - 28 : std::vector<std::string> show() const override; - 29 : std::vector<std::string> graph(const std::string& title) const override; - 30 12 : std::vector<std::string> topological_order() override - 31 : { - 32 12 : return std::vector<std::string>(); - 33 : } - 34 8 : std::string dump_cpt() const override - 35 : { - 36 16 : return ""; - 37 : } - 38 : protected: - 39 : torch::Tensor predict_average_voting(torch::Tensor& X); - 40 : std::vector<std::vector<double>> predict_average_voting(std::vector<std::vector<int>>& X); - 41 : torch::Tensor predict_average_proba(torch::Tensor& X); - 42 : std::vector<std::vector<double>> predict_average_proba(std::vector<std::vector<int>>& X); - 43 : torch::Tensor compute_arg_max(torch::Tensor& X); - 44 : std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X); - 45 : torch::Tensor voting(torch::Tensor& votes); - 46 : unsigned n_models; - 47 : std::vector<std::unique_ptr<Classifier>> models; - 48 : std::vector<double> significanceModels; - 49 : void trainModel(const torch::Tensor& weights) override; - 50 : bool predict_voting; - 51 : }; - 52 : } - 53 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
A2DE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BoostAODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Ensemble.h | -
- |
- 100.0 % | -5 | -5 | -100.0 % | -3 | -3 | -|
A2DE.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
AODE.cc | -
- |
- 100.0 % | -20 | -20 | -100.0 % | -4 | -4 | -|
AODELd.cc | -
- |
- 100.0 % | -24 | -24 | -100.0 % | -5 | -5 | -|
BoostAODE.cc | -
- |
- 98.3 % | -237 | -233 | -100.0 % | -9 | -9 | -|
Ensemble.cc | -
- |
- 100.0 % | -155 | -155 | -100.0 % | -25 | -25 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BoostAODE.cc | -
- |
- 98.3 % | -237 | -233 | -100.0 % | -9 | -9 | -|
A2DE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BoostAODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Ensemble.h | -
- |
- 100.0 % | -5 | -5 | -100.0 % | -3 | -3 | -|
AODE.cc | -
- |
- 100.0 % | -20 | -20 | -100.0 % | -4 | -4 | -|
A2DE.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
AODELd.cc | -
- |
- 100.0 % | -24 | -24 | -100.0 % | -5 | -5 | -|
Ensemble.cc | -
- |
- 100.0 % | -155 | -155 | -100.0 % | -25 | -25 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
A2DE.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
A2DE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODE.cc | -
- |
- 100.0 % | -20 | -20 | -100.0 % | -4 | -4 | -|
AODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODELd.cc | -
- |
- 100.0 % | -24 | -24 | -100.0 % | -5 | -5 | -|
AODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BoostAODE.cc | -
- |
- 98.3 % | -237 | -233 | -100.0 % | -9 | -9 | -|
BoostAODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Ensemble.cc | -
- |
- 100.0 % | -155 | -155 | -100.0 % | -25 | -25 | -|
Ensemble.h | -
- |
- 100.0 % | -5 | -5 | -100.0 % | -3 | -3 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
A2DE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BoostAODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Ensemble.h | -
- |
- 100.0 % | -5 | -5 | -100.0 % | -3 | -3 | -|
A2DE.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
AODE.cc | -
- |
- 100.0 % | -20 | -20 | -100.0 % | -4 | -4 | -|
AODELd.cc | -
- |
- 100.0 % | -24 | -24 | -100.0 % | -5 | -5 | -|
BoostAODE.cc | -
- |
- 98.3 % | -237 | -233 | -100.0 % | -9 | -9 | -|
Ensemble.cc | -
- |
- 100.0 % | -155 | -155 | -100.0 % | -25 | -25 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BoostAODE.cc | -
- |
- 98.3 % | -237 | -233 | -100.0 % | -9 | -9 | -|
A2DE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BoostAODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Ensemble.h | -
- |
- 100.0 % | -5 | -5 | -100.0 % | -3 | -3 | -|
AODE.cc | -
- |
- 100.0 % | -20 | -20 | -100.0 % | -4 | -4 | -|
A2DE.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
AODELd.cc | -
- |
- 100.0 % | -24 | -24 | -100.0 % | -5 | -5 | -|
Ensemble.cc | -
- |
- 100.0 % | -155 | -155 | -100.0 % | -25 | -25 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
A2DE.cc | -
- |
- 100.0 % | -23 | -23 | -100.0 % | -4 | -4 | -|
A2DE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODE.cc | -
- |
- 100.0 % | -20 | -20 | -100.0 % | -4 | -4 | -|
AODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
AODELd.cc | -
- |
- 100.0 % | -24 | -24 | -100.0 % | -5 | -5 | -|
AODELd.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BoostAODE.cc | -
- |
- 98.3 % | -237 | -233 | -100.0 % | -9 | -9 | -|
BoostAODE.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Ensemble.cc | -
- |
- 100.0 % | -155 | -155 | -100.0 % | -25 | -25 | -|
Ensemble.h | -
- |
- 100.0 % | -5 | -5 | -100.0 % | -3 | -3 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::CFS::fit() | - -48 | - - -
bayesnet::CFS::computeContinueCondition(std::vector<int, std::allocator<int> > const&) | - -220 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::CFS::computeContinueCondition(std::vector<int, std::allocator<int> > const&) | - -220 | - - -
bayesnet::CFS::fit() | - -48 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <limits> - 8 : #include "bayesnet/utils/bayesnetUtils.h" - 9 : #include "CFS.h" - 10 : namespace bayesnet { - 11 48 : void CFS::fit() - 12 : { - 13 48 : initialize(); - 14 48 : computeSuLabels(); - 15 48 : auto featureOrder = argsort(suLabels); // sort descending order - 16 48 : auto continueCondition = true; - 17 48 : auto feature = featureOrder[0]; - 18 48 : selectedFeatures.push_back(feature); - 19 48 : selectedScores.push_back(suLabels[feature]); - 20 48 : featureOrder.erase(featureOrder.begin()); - 21 268 : while (continueCondition) { - 22 220 : double merit = std::numeric_limits<double>::lowest(); - 23 220 : int bestFeature = -1; - 24 1250 : for (auto feature : featureOrder) { - 25 1030 : selectedFeatures.push_back(feature); - 26 : // Compute merit with selectedFeatures - 27 1030 : auto meritNew = computeMeritCFS(); - 28 1030 : if (meritNew > merit) { - 29 450 : merit = meritNew; - 30 450 : bestFeature = feature; - 31 : } - 32 1030 : selectedFeatures.pop_back(); - 33 : } - 34 220 : if (bestFeature == -1) { - 35 : // meritNew has to be nan due to constant features - 36 0 : break; - 37 : } - 38 220 : selectedFeatures.push_back(bestFeature); - 39 220 : selectedScores.push_back(merit); - 40 220 : featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end()); - 41 220 : continueCondition = computeContinueCondition(featureOrder); - 42 : } - 43 48 : fitted = true; - 44 48 : } - 45 220 : bool CFS::computeContinueCondition(const std::vector<int>& featureOrder) - 46 : { - 47 220 : if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) { - 48 10 : return false; - 49 : } - 50 210 : if (selectedScores.size() >= 5) { - 51 : /* - 52 : "To prevent the best first search from exploring the entire - 53 : feature subset search space, a stopping criterion is imposed. - 54 : The search will terminate if five consecutive fully expanded - 55 : subsets show no improvement over the current best subset." - 56 : as stated in Mark A.Hall Thesis - 57 : */ - 58 76 : double item_ant = std::numeric_limits<double>::lowest(); - 59 76 : int num = 0; - 60 76 : std::vector<double> lastFive(selectedScores.end() - 5, selectedScores.end()); - 61 304 : for (auto item : lastFive) { - 62 266 : if (item_ant == std::numeric_limits<double>::lowest()) { - 63 76 : item_ant = item; - 64 : } - 65 266 : if (item > item_ant) { - 66 38 : break; - 67 : } else { - 68 228 : num++; - 69 228 : item_ant = item; - 70 : } - 71 : } - 72 76 : if (num == 5) { - 73 38 : return false; - 74 : } - 75 76 : } - 76 172 : return true; - 77 : } - 78 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef CFS_H - 8 : #define CFS_H - 9 : #include <torch/torch.h> - 10 : #include <vector> - 11 : #include "bayesnet/feature_selection/FeatureSelect.h" - 12 : namespace bayesnet { - 13 : class CFS : public FeatureSelect { - 14 : public: - 15 : // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector - 16 29 : CFS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : - 17 29 : FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights) - 18 : { - 19 29 : } - 20 116 : virtual ~CFS() {}; - 21 : void fit() override; - 22 : private: - 23 : bool computeContinueCondition(const std::vector<int>& featureOrder); - 24 : }; - 25 : } - 26 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "bayesnet/utils/bayesnetUtils.h" - 8 : #include "FCBF.h" - 9 : namespace bayesnet { - 10 : - 11 76 : FCBF::FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : - 12 76 : FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold) - 13 : { - 14 76 : if (threshold < 1e-7) { - 15 20 : throw std::invalid_argument("Threshold cannot be less than 1e-7"); - 16 : } - 17 76 : } - 18 56 : void FCBF::fit() - 19 : { - 20 56 : initialize(); - 21 56 : computeSuLabels(); - 22 56 : auto featureOrder = argsort(suLabels); // sort descending order - 23 56 : auto featureOrderCopy = featureOrder; - 24 419 : for (const auto& feature : featureOrder) { - 25 : // Don't self compare - 26 372 : featureOrderCopy.erase(featureOrderCopy.begin()); - 27 372 : if (suLabels.at(feature) == 0.0) { - 28 : // The feature has been removed from the list - 29 153 : continue; - 30 : } - 31 219 : if (suLabels.at(feature) < threshold) { - 32 5 : break; - 33 : } - 34 : // Remove redundant features - 35 1220 : for (const auto& featureCopy : featureOrderCopy) { - 36 1006 : double value = computeSuFeatures(feature, featureCopy); - 37 1006 : if (value >= suLabels.at(featureCopy)) { - 38 : // Remove feature from list - 39 333 : suLabels[featureCopy] = 0.0; - 40 : } - 41 : } - 42 214 : selectedFeatures.push_back(feature); - 43 214 : selectedScores.push_back(suLabels[feature]); - 44 214 : if (selectedFeatures.size() == maxFeatures) { - 45 4 : break; - 46 : } - 47 : } - 48 56 : fitted = true; - 49 56 : } - 50 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::FCBF::~FCBF() | - -56 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::FCBF::~FCBF() | - -56 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef FCBF_H - 8 : #define FCBF_H - 9 : #include <torch/torch.h> - 10 : #include <vector> - 11 : #include "bayesnet/feature_selection/FeatureSelect.h" - 12 : namespace bayesnet { - 13 : class FCBF : public FeatureSelect { - 14 : public: - 15 : // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector - 16 : FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); - 17 56 : virtual ~FCBF() {}; - 18 : void fit() override; - 19 : private: - 20 : double threshold = -1; - 21 : }; - 22 : } - 23 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::FeatureSelect::computeSuLabels() | - -151 | - - -
bayesnet::FeatureSelect::initialize() | - -151 | - - -
bayesnet::FeatureSelect::getScores() const | - -156 | - - -
bayesnet::FeatureSelect::getFeatures() const | - -171 | - - -
bayesnet::FeatureSelect::FeatureSelect(at::Tensor const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, int, at::Tensor const&) | - -221 | - - -
bayesnet::FeatureSelect::computeMeritCFS() | - -1239 | - - -
bayesnet::FeatureSelect::symmetricalUncertainty(int, int) | - -3728 | - - -
bayesnet::FeatureSelect::computeSuFeatures(int, int) | - -7921 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::FeatureSelect::FeatureSelect(at::Tensor const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, int, at::Tensor const&) | - -221 | - - -
bayesnet::FeatureSelect::computeMeritCFS() | - -1239 | - - -
bayesnet::FeatureSelect::computeSuFeatures(int, int) | - -7921 | - - -
bayesnet::FeatureSelect::computeSuLabels() | - -151 | - - -
bayesnet::FeatureSelect::getFeatures() const | - -171 | - - -
bayesnet::FeatureSelect::getScores() const | - -156 | - - -
bayesnet::FeatureSelect::initialize() | - -151 | - - -
bayesnet::FeatureSelect::symmetricalUncertainty(int, int) | - -3728 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <limits> - 8 : #include "bayesnet/utils/bayesnetUtils.h" - 9 : #include "FeatureSelect.h" - 10 : namespace bayesnet { - 11 221 : FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : - 12 221 : Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights) - 13 : - 14 : { - 15 221 : } - 16 151 : void FeatureSelect::initialize() - 17 : { - 18 151 : selectedFeatures.clear(); - 19 151 : selectedScores.clear(); - 20 151 : } - 21 3728 : double FeatureSelect::symmetricalUncertainty(int a, int b) - 22 : { - 23 : /* - 24 : Compute symmetrical uncertainty. Normalize* information gain (mutual - 25 : information) with the entropies of the features in order to compensate - 26 : the bias due to high cardinality features. *Range [0, 1] - 27 : (https://www.sciencedirect.com/science/article/pii/S0020025519303603) - 28 : */ - 29 11184 : auto x = samples.index({ a, "..." }); - 30 11184 : auto y = samples.index({ b, "..." }); - 31 3728 : auto mu = mutualInformation(x, y, weights); - 32 3728 : auto hx = entropy(x, weights); - 33 3728 : auto hy = entropy(y, weights); - 34 3728 : return 2.0 * mu / (hx + hy); - 35 11184 : } - 36 151 : void FeatureSelect::computeSuLabels() - 37 : { - 38 : // Compute Simmetrical Uncertainty between features and labels - 39 : // https://en.wikipedia.org/wiki/Symmetric_uncertainty - 40 1258 : for (int i = 0; i < features.size(); ++i) { - 41 1107 : suLabels.push_back(symmetricalUncertainty(i, -1)); - 42 : } - 43 151 : } - 44 7921 : double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature) - 45 : { - 46 : // Compute Simmetrical Uncertainty between features - 47 : // https://en.wikipedia.org/wiki/Symmetric_uncertainty - 48 : try { - 49 7921 : return suFeatures.at({ firstFeature, secondFeature }); - 50 : } - 51 2621 : catch (const std::out_of_range& e) { - 52 2621 : double result = symmetricalUncertainty(firstFeature, secondFeature); - 53 2621 : suFeatures[{firstFeature, secondFeature}] = result; - 54 2621 : return result; - 55 2621 : } - 56 : } - 57 1239 : double FeatureSelect::computeMeritCFS() - 58 : { - 59 1239 : double rcf = 0; - 60 5693 : for (auto feature : selectedFeatures) { - 61 4454 : rcf += suLabels[feature]; - 62 : } - 63 1239 : double rff = 0; - 64 1239 : int n = selectedFeatures.size(); - 65 8154 : for (const auto& item : doCombinations(selectedFeatures)) { - 66 6915 : rff += computeSuFeatures(item.first, item.second); - 67 1239 : } - 68 1239 : return rcf / sqrt(n + (n * n - n) * rff); - 69 : } - 70 171 : std::vector<int> FeatureSelect::getFeatures() const - 71 : { - 72 171 : if (!fitted) { - 73 20 : throw std::runtime_error("FeatureSelect not fitted"); - 74 : } - 75 151 : return selectedFeatures; - 76 : } - 77 156 : std::vector<double> FeatureSelect::getScores() const - 78 : { - 79 156 : if (!fitted) { - 80 20 : throw std::runtime_error("FeatureSelect not fitted"); - 81 : } - 82 136 : return selectedScores; - 83 : } - 84 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::FeatureSelect::~FeatureSelect() | - -111 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::FeatureSelect::~FeatureSelect() | - -111 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef FEATURE_SELECT_H - 8 : #define FEATURE_SELECT_H - 9 : #include <torch/torch.h> - 10 : #include <vector> - 11 : #include "bayesnet/utils/BayesMetrics.h" - 12 : namespace bayesnet { - 13 : class FeatureSelect : public Metrics { - 14 : public: - 15 : // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector - 16 : FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights); - 17 111 : virtual ~FeatureSelect() {}; - 18 : virtual void fit() = 0; - 19 : std::vector<int> getFeatures() const; - 20 : std::vector<double> getScores() const; - 21 : protected: - 22 : void initialize(); - 23 : void computeSuLabels(); - 24 : double computeSuFeatures(const int a, const int b); - 25 : double symmetricalUncertainty(int a, int b); - 26 : double computeMeritCFS(); - 27 : const torch::Tensor& weights; - 28 : int maxFeatures; - 29 : std::vector<int> selectedFeatures; - 30 : std::vector<double> selectedScores; - 31 : std::vector<double> suLabels; - 32 : std::map<std::pair<int, int>, double> suFeatures; - 33 : bool fitted = false; - 34 : }; - 35 : } - 36 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <limits> - 8 : #include "bayesnet/utils/bayesnetUtils.h" - 9 : #include "IWSS.h" - 10 : namespace bayesnet { - 11 87 : IWSS::IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : - 12 87 : FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold) - 13 : { - 14 87 : if (threshold < 0 || threshold > .5) { - 15 40 : throw std::invalid_argument("Threshold has to be in [0, 0.5]"); - 16 : } - 17 87 : } - 18 47 : void IWSS::fit() - 19 : { - 20 47 : initialize(); - 21 47 : computeSuLabels(); - 22 47 : auto featureOrder = argsort(suLabels); // sort descending order - 23 47 : auto featureOrderCopy = featureOrder; - 24 : // Add first and second features to result - 25 : // First with its own score - 26 47 : auto first_feature = pop_first(featureOrderCopy); - 27 47 : selectedFeatures.push_back(first_feature); - 28 47 : selectedScores.push_back(suLabels.at(first_feature)); - 29 : // Second with the score of the candidates - 30 47 : selectedFeatures.push_back(pop_first(featureOrderCopy)); - 31 47 : auto merit = computeMeritCFS(); - 32 47 : selectedScores.push_back(merit); - 33 162 : for (const auto feature : featureOrderCopy) { - 34 162 : selectedFeatures.push_back(feature); - 35 : // Compute merit with selectedFeatures - 36 162 : auto meritNew = computeMeritCFS(); - 37 162 : double delta = merit != 0.0 ? std::abs(merit - meritNew) / merit : 0.0; - 38 162 : if (meritNew > merit || delta < threshold) { - 39 117 : if (meritNew > merit) { - 40 0 : merit = meritNew; - 41 : } - 42 117 : selectedScores.push_back(meritNew); - 43 : } else { - 44 45 : selectedFeatures.pop_back(); - 45 47 : break; - 46 : } - 47 117 : if (selectedFeatures.size() == maxFeatures) { - 48 2 : break; - 49 : } - 50 : } - 51 47 : fitted = true; - 52 47 : } - 53 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::IWSS::~IWSS() | - -48 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::IWSS::~IWSS() | - -48 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef IWSS_H - 8 : #define IWSS_H - 9 : #include <vector> - 10 : #include <torch/torch.h> - 11 : #include "FeatureSelect.h" - 12 : namespace bayesnet { - 13 : class IWSS : public FeatureSelect { - 14 : public: - 15 : // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector - 16 : IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); - 17 48 : virtual ~IWSS() {}; - 18 : void fit() override; - 19 : private: - 20 : double threshold = -1; - 21 : }; - 22 : } - 23 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
FCBF.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
FeatureSelect.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
IWSS.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
CFS.cc | -
- |
- 97.8 % | -45 | -44 | -100.0 % | -2 | -2 | -|
CFS.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -2 | -2 | -|
FCBF.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -2 | -2 | -|
IWSS.cc | -
- |
- 96.7 % | -30 | -29 | -100.0 % | -2 | -2 | -|
FeatureSelect.cc | -
- |
- 100.0 % | -44 | -44 | -100.0 % | -8 | -8 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
IWSS.cc | -
- |
- 96.7 % | -30 | -29 | -100.0 % | -2 | -2 | -|
CFS.cc | -
- |
- 97.8 % | -45 | -44 | -100.0 % | -2 | -2 | -|
FCBF.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
FeatureSelect.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
IWSS.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
CFS.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -2 | -2 | -|
FCBF.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -2 | -2 | -|
FeatureSelect.cc | -
- |
- 100.0 % | -44 | -44 | -100.0 % | -8 | -8 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
CFS.cc | -
- |
- 97.8 % | -45 | -44 | -100.0 % | -2 | -2 | -|
CFS.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -2 | -2 | -|
FCBF.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -2 | -2 | -|
FCBF.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
FeatureSelect.cc | -
- |
- 100.0 % | -44 | -44 | -100.0 % | -8 | -8 | -|
FeatureSelect.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
IWSS.cc | -
- |
- 96.7 % | -30 | -29 | -100.0 % | -2 | -2 | -|
IWSS.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
FCBF.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
FeatureSelect.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
IWSS.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
CFS.cc | -
- |
- 97.8 % | -45 | -44 | -100.0 % | -2 | -2 | -|
CFS.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -2 | -2 | -|
FCBF.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -2 | -2 | -|
IWSS.cc | -
- |
- 96.7 % | -30 | -29 | -100.0 % | -2 | -2 | -|
FeatureSelect.cc | -
- |
- 100.0 % | -44 | -44 | -100.0 % | -8 | -8 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
IWSS.cc | -
- |
- 96.7 % | -30 | -29 | -100.0 % | -2 | -2 | -|
CFS.cc | -
- |
- 97.8 % | -45 | -44 | -100.0 % | -2 | -2 | -|
FCBF.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
FeatureSelect.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
IWSS.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
CFS.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -2 | -2 | -|
FCBF.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -2 | -2 | -|
FeatureSelect.cc | -
- |
- 100.0 % | -44 | -44 | -100.0 % | -8 | -8 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
CFS.cc | -
- |
- 97.8 % | -45 | -44 | -100.0 % | -2 | -2 | -|
CFS.h | -
- |
- 100.0 % | -4 | -4 | -100.0 % | -2 | -2 | -|
FCBF.cc | -
- |
- 100.0 % | -26 | -26 | -100.0 % | -2 | -2 | -|
FCBF.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
FeatureSelect.cc | -
- |
- 100.0 % | -44 | -44 | -100.0 % | -8 | -8 | -|
FeatureSelect.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
IWSS.cc | -
- |
- 96.7 % | -30 | -29 | -100.0 % | -2 | -2 | -|
IWSS.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BaseClassifier.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BaseClassifier.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BaseClassifier.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BaseClassifier.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BaseClassifier.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BaseClassifier.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <thread> - 8 : #include <mutex> - 9 : #include <sstream> - 10 : #include "Network.h" - 11 : #include "bayesnet/utils/bayesnetUtils.h" - 12 : namespace bayesnet { - 13 2332 : Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 } - 14 : { - 15 2332 : } - 16 8 : Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 } - 17 : { - 18 : - 19 8 : } - 20 2244 : Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), - 21 4488 : maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples) - 22 : { - 23 2244 : if (samples.defined()) - 24 4 : samples = samples.clone(); - 25 2264 : for (const auto& node : other.nodes) { - 26 20 : nodes[node.first] = std::make_unique<Node>(*node.second); - 27 : } - 28 2244 : } - 29 1740 : void Network::initialize() - 30 : { - 31 1740 : features.clear(); - 32 1740 : className = ""; - 33 1740 : classNumStates = 0; - 34 1740 : fitted = false; - 35 1740 : nodes.clear(); - 36 1740 : samples = torch::Tensor(); - 37 1740 : } - 38 2256 : float Network::getMaxThreads() const - 39 : { - 40 2256 : return maxThreads; - 41 : } - 42 48 : torch::Tensor& Network::getSamples() - 43 : { - 44 48 : return samples; - 45 : } - 46 31216 : void Network::addNode(const std::string& name) - 47 : { - 48 31216 : if (name == "") { - 49 8 : throw std::invalid_argument("Node name cannot be empty"); - 50 : } - 51 31208 : if (nodes.find(name) != nodes.end()) { - 52 4 : return; - 53 : } - 54 31204 : if (find(features.begin(), features.end(), name) == features.end()) { - 55 31204 : features.push_back(name); - 56 : } - 57 31204 : nodes[name] = std::make_unique<Node>(name); - 58 : } - 59 380 : std::vector<std::string> Network::getFeatures() const - 60 : { - 61 380 : return features; - 62 : } - 63 2616 : int Network::getClassNumStates() const - 64 : { - 65 2616 : return classNumStates; - 66 : } - 67 48 : int Network::getStates() const - 68 : { - 69 48 : int result = 0; - 70 288 : for (auto& node : nodes) { - 71 240 : result += node.second->getNumStates(); - 72 : } - 73 48 : return result; - 74 : } - 75 3735008 : std::string Network::getClassName() const - 76 : { - 77 3735008 : return className; - 78 : } - 79 70324 : bool Network::isCyclic(const std::string& nodeId, std::unordered_set<std::string>& visited, std::unordered_set<std::string>& recStack) - 80 : { - 81 70324 : if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet - 82 : { - 83 70324 : visited.insert(nodeId); - 84 70324 : recStack.insert(nodeId); - 85 81496 : for (Node* child : nodes[nodeId]->getChildren()) { - 86 11196 : if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack)) - 87 24 : return true; - 88 11180 : if (recStack.find(child->getName()) != recStack.end()) - 89 8 : return true; - 90 : } - 91 : } - 92 70300 : recStack.erase(nodeId); // remove node from recursion stack before function ends - 93 70300 : return false; - 94 : } - 95 59152 : void Network::addEdge(const std::string& parent, const std::string& child) - 96 : { - 97 59152 : if (nodes.find(parent) == nodes.end()) { - 98 8 : throw std::invalid_argument("Parent node " + parent + " does not exist"); - 99 : } - 100 59144 : if (nodes.find(child) == nodes.end()) { - 101 8 : throw std::invalid_argument("Child node " + child + " does not exist"); - 102 : } - 103 : // Temporarily add edge to check for cycles - 104 59136 : nodes[parent]->addChild(nodes[child].get()); - 105 59136 : nodes[child]->addParent(nodes[parent].get()); - 106 59136 : std::unordered_set<std::string> visited; - 107 59136 : std::unordered_set<std::string> recStack; - 108 59136 : if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle - 109 : { - 110 : // remove problematic edge - 111 8 : nodes[parent]->removeChild(nodes[child].get()); - 112 8 : nodes[child]->removeParent(nodes[parent].get()); - 113 8 : throw std::invalid_argument("Adding this edge forms a cycle in the graph."); - 114 : } - 115 59144 : } - 116 3735276 : std::map<std::string, std::unique_ptr<Node>>& Network::getNodes() - 117 : { - 118 3735276 : return nodes; - 119 : } - 120 1888 : void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) - 121 : { - 122 1888 : if (weights.size(0) != n_samples) { - 123 8 : throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit"); - 124 : } - 125 1880 : if (n_samples != n_samples_y) { - 126 8 : throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")"); - 127 : } - 128 1872 : if (n_features != featureNames.size()) { - 129 8 : throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")"); - 130 : } - 131 1864 : if (features.size() == 0) { - 132 8 : throw std::invalid_argument("The network has not been initialized. You must call addNode() before calling fit()"); - 133 : } - 134 1856 : if (n_features != features.size() - 1) { - 135 8 : throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")"); - 136 : } - 137 1848 : if (find(features.begin(), features.end(), className) == features.end()) { - 138 8 : throw std::invalid_argument("Class Name not found in Network::features"); - 139 : } - 140 32868 : for (auto& feature : featureNames) { - 141 31044 : if (find(features.begin(), features.end(), feature) == features.end()) { - 142 8 : throw std::invalid_argument("Feature " + feature + " not found in Network::features"); - 143 : } - 144 31036 : if (states.find(feature) == states.end()) { - 145 8 : throw std::invalid_argument("Feature " + feature + " not found in states"); - 146 : } - 147 : } - 148 1824 : } - 149 1824 : void Network::setStates(const std::map<std::string, std::vector<int>>& states) - 150 : { - 151 : // Set states to every Node in the network - 152 1824 : for_each(features.begin(), features.end(), [this, &states](const std::string& feature) { - 153 32828 : nodes.at(feature)->setNumStates(states.at(feature).size()); - 154 32828 : }); - 155 1824 : classNumStates = nodes.at(className)->getNumStates(); - 156 1824 : } - 157 : // X comes in nxm, where n is the number of features and m the number of samples - 158 4 : void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) - 159 : { - 160 4 : checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); - 161 4 : this->className = className; - 162 4 : torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); - 163 12 : samples = torch::cat({ X , ytmp }, 0); - 164 20 : for (int i = 0; i < featureNames.size(); ++i) { - 165 48 : auto row_feature = X.index({ i, "..." }); - 166 16 : } - 167 4 : completeFit(states, weights); - 168 24 : } - 169 1792 : void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) - 170 : { - 171 1792 : checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); - 172 1792 : this->className = className; - 173 1792 : this->samples = samples; - 174 1792 : completeFit(states, weights); - 175 1792 : } - 176 : // input_data comes in nxm, where n is the number of features and m the number of samples - 177 92 : void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) - 178 : { - 179 92 : const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); - 180 92 : checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); - 181 28 : this->className = className; - 182 : // Build tensor of samples (nxm) (n+1 because of the class) - 183 28 : samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32); - 184 140 : for (int i = 0; i < featureNames.size(); ++i) { - 185 448 : samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); - 186 : } - 187 112 : samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); - 188 28 : completeFit(states, weights); - 189 232 : } - 190 1824 : void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) - 191 : { - 192 1824 : setStates(states); - 193 1824 : laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation - 194 1824 : std::vector<std::thread> threads; - 195 34652 : for (auto& node : nodes) { - 196 32828 : threads.emplace_back([this, &node, &weights]() { - 197 32828 : node.second->computeCPT(samples, features, laplaceSmoothing, weights); - 198 32828 : }); - 199 : } - 200 34652 : for (auto& thread : threads) { - 201 32828 : thread.join(); - 202 : } - 203 1824 : fitted = true; - 204 1824 : } - 205 3320 : torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) - 206 : { - 207 3320 : if (!fitted) { - 208 8 : throw std::logic_error("You must call fit() before calling predict()"); - 209 : } - 210 3312 : torch::Tensor result; - 211 3312 : result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); - 212 785016 : for (int i = 0; i < samples.size(1); ++i) { - 213 2345136 : const torch::Tensor sample = samples.index({ "...", i }); - 214 781712 : auto psample = predict_sample(sample); - 215 781704 : auto temp = torch::tensor(psample, torch::kFloat64); - 216 : // result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); - 217 2345112 : result.index_put_({ i, "..." }, temp); - 218 781712 : } - 219 3304 : if (proba) - 220 1476 : return result; - 221 3656 : return result.argmax(1); - 222 1566728 : } - 223 : // Return mxn tensor of probabilities - 224 1476 : torch::Tensor Network::predict_proba(const torch::Tensor& samples) - 225 : { - 226 1476 : return predict_tensor(samples, true); - 227 : } - 228 : - 229 : // Return mxn tensor of probabilities - 230 1844 : torch::Tensor Network::predict(const torch::Tensor& samples) - 231 : { - 232 1844 : return predict_tensor(samples, false); - 233 : } - 234 : - 235 : // Return mx1 std::vector of predictions - 236 : // tsamples is nxm std::vector of samples - 237 48 : std::vector<int> Network::predict(const std::vector<std::vector<int>>& tsamples) - 238 : { - 239 48 : if (!fitted) { - 240 16 : throw std::logic_error("You must call fit() before calling predict()"); - 241 : } - 242 32 : std::vector<int> predictions; - 243 32 : std::vector<int> sample; - 244 3564 : for (int row = 0; row < tsamples[0].size(); ++row) { - 245 3540 : sample.clear(); - 246 26252 : for (int col = 0; col < tsamples.size(); ++col) { - 247 22712 : sample.push_back(tsamples[col][row]); - 248 : } - 249 3540 : std::vector<double> classProbabilities = predict_sample(sample); - 250 : // Find the class with the maximum posterior probability - 251 3532 : auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); - 252 3532 : int predictedClass = distance(classProbabilities.begin(), maxElem); - 253 3532 : predictions.push_back(predictedClass); - 254 3532 : } - 255 48 : return predictions; - 256 40 : } - 257 : // Return mxn std::vector of probabilities - 258 : // tsamples is nxm std::vector of samples - 259 552 : std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples) - 260 : { - 261 552 : if (!fitted) { - 262 8 : throw std::logic_error("You must call fit() before calling predict_proba()"); - 263 : } - 264 544 : std::vector<std::vector<double>> predictions; - 265 544 : std::vector<int> sample; - 266 111516 : for (int row = 0; row < tsamples[0].size(); ++row) { - 267 110972 : sample.clear(); - 268 1055620 : for (int col = 0; col < tsamples.size(); ++col) { - 269 944648 : sample.push_back(tsamples[col][row]); - 270 : } - 271 110972 : predictions.push_back(predict_sample(sample)); - 272 : } - 273 1088 : return predictions; - 274 544 : } - 275 20 : double Network::score(const std::vector<std::vector<int>>& tsamples, const std::vector<int>& labels) - 276 : { - 277 20 : std::vector<int> y_pred = predict(tsamples); - 278 12 : int correct = 0; - 279 2324 : for (int i = 0; i < y_pred.size(); ++i) { - 280 2312 : if (y_pred[i] == labels[i]) { - 281 1944 : correct++; - 282 : } - 283 : } - 284 24 : return (double)correct / y_pred.size(); - 285 12 : } - 286 : // Return 1xn std::vector of probabilities - 287 114512 : std::vector<double> Network::predict_sample(const std::vector<int>& sample) - 288 : { - 289 : // Ensure the sample size is equal to the number of features - 290 114512 : if (sample.size() != features.size() - 1) { - 291 16 : throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) + - 292 24 : ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); - 293 : } - 294 114504 : std::map<std::string, int> evidence; - 295 1081840 : for (int i = 0; i < sample.size(); ++i) { - 296 967336 : evidence[features[i]] = sample[i]; - 297 : } - 298 229008 : return exactInference(evidence); - 299 114504 : } - 300 : // Return 1xn std::vector of probabilities - 301 781712 : std::vector<double> Network::predict_sample(const torch::Tensor& sample) - 302 : { - 303 : // Ensure the sample size is equal to the number of features - 304 781712 : if (sample.size(0) != features.size() - 1) { - 305 16 : throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) + - 306 24 : ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); - 307 : } - 308 781704 : std::map<std::string, int> evidence; - 309 18085136 : for (int i = 0; i < sample.size(0); ++i) { - 310 17303432 : evidence[features[i]] = sample[i].item<int>(); - 311 : } - 312 1563408 : return exactInference(evidence); - 313 781704 : } - 314 3734984 : double Network::computeFactor(std::map<std::string, int>& completeEvidence) - 315 : { - 316 3734984 : double result = 1.0; - 317 72886736 : for (auto& node : getNodes()) { - 318 69151752 : result *= node.second->getFactorValue(completeEvidence); - 319 : } - 320 3734984 : return result; - 321 : } - 322 896208 : std::vector<double> Network::exactInference(std::map<std::string, int>& evidence) - 323 : { - 324 896208 : std::vector<double> result(classNumStates, 0.0); - 325 896208 : std::vector<std::thread> threads; - 326 896208 : std::mutex mtx; - 327 4631192 : for (int i = 0; i < classNumStates; ++i) { - 328 3734984 : threads.emplace_back([this, &result, &evidence, i, &mtx]() { - 329 3734984 : auto completeEvidence = std::map<std::string, int>(evidence); - 330 3734984 : completeEvidence[getClassName()] = i; - 331 3734984 : double factor = computeFactor(completeEvidence); - 332 3734984 : std::lock_guard<std::mutex> lock(mtx); - 333 3734984 : result[i] = factor; - 334 3734984 : }); - 335 : } - 336 4631192 : for (auto& thread : threads) { - 337 3734984 : thread.join(); - 338 : } - 339 : // Normalize result - 340 896208 : double sum = accumulate(result.begin(), result.end(), 0.0); - 341 4631192 : transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; }); - 342 1792416 : return result; - 343 896208 : } - 344 28 : std::vector<std::string> Network::show() const - 345 : { - 346 28 : std::vector<std::string> result; - 347 : // Draw the network - 348 160 : for (auto& node : nodes) { - 349 132 : std::string line = node.first + " -> "; - 350 308 : for (auto child : node.second->getChildren()) { - 351 176 : line += child->getName() + ", "; - 352 : } - 353 132 : result.push_back(line); - 354 132 : } - 355 56 : return result; - 356 28 : } - 357 112 : std::vector<std::string> Network::graph(const std::string& title) const - 358 : { - 359 112 : auto output = std::vector<std::string>(); - 360 112 : auto prefix = "digraph BayesNet {\nlabel=<BayesNet "; - 361 112 : auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n"; - 362 112 : std::string header = prefix + title + suffix; - 363 112 : output.push_back(header); - 364 844 : for (auto& node : nodes) { - 365 732 : auto result = node.second->graph(className); - 366 732 : output.insert(output.end(), result.begin(), result.end()); - 367 732 : } - 368 112 : output.push_back("}\n"); - 369 224 : return output; - 370 112 : } - 371 408 : std::vector<std::pair<std::string, std::string>> Network::getEdges() const - 372 : { - 373 408 : auto edges = std::vector<std::pair<std::string, std::string>>(); - 374 7396 : for (const auto& node : nodes) { - 375 6988 : auto head = node.first; - 376 20312 : for (const auto& child : node.second->getChildren()) { - 377 13324 : auto tail = child->getName(); - 378 13324 : edges.push_back({ head, tail }); - 379 13324 : } - 380 6988 : } - 381 816 : return edges; - 382 408 : } - 383 364 : int Network::getNumEdges() const - 384 : { - 385 364 : return getEdges().size(); - 386 : } - 387 220 : std::vector<std::string> Network::topological_sort() - 388 : { - 389 : /* Check if al the fathers of every node are before the node */ - 390 220 : auto result = features; - 391 220 : result.erase(remove(result.begin(), result.end(), className), result.end()); - 392 220 : bool ending{ false }; - 393 628 : while (!ending) { - 394 408 : ending = true; - 395 3804 : for (auto feature : features) { - 396 3396 : auto fathers = nodes[feature]->getParents(); - 397 9000 : for (const auto& father : fathers) { - 398 5604 : auto fatherName = father->getName(); - 399 5604 : if (fatherName == className) { - 400 2980 : continue; - 401 : } - 402 : // Check if father is placed before the actual feature - 403 2624 : auto it = find(result.begin(), result.end(), fatherName); - 404 2624 : if (it != result.end()) { - 405 2624 : auto it2 = find(result.begin(), result.end(), feature); - 406 2624 : if (it2 != result.end()) { - 407 5248 : if (distance(it, it2) < 0) { - 408 : // if it is not, insert it before the feature - 409 244 : result.erase(remove(result.begin(), result.end(), fatherName), result.end()); - 410 244 : result.insert(it2, fatherName); - 411 244 : ending = false; - 412 : } - 413 : } - 414 : } - 415 5604 : } - 416 3396 : } - 417 : } - 418 440 : return result; - 419 220 : } - 420 8 : std::string Network::dump_cpt() const - 421 : { - 422 8 : std::stringstream oss; - 423 48 : for (auto& node : nodes) { - 424 40 : oss << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl; - 425 40 : oss << node.second->getCPT() << std::endl; - 426 : } - 427 16 : return oss.str(); - 428 8 : } - 429 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Network::~Network() | - -4024 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Network::~Network() | - -4024 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef NETWORK_H - 8 : #define NETWORK_H - 9 : #include <map> - 10 : #include <vector> - 11 : #include "bayesnet/config.h" - 12 : #include "Node.h" - 13 : - 14 : namespace bayesnet { - 15 : class Network { - 16 : public: - 17 : Network(); - 18 : explicit Network(float); - 19 : explicit Network(const Network&); - 20 4024 : ~Network() = default; - 21 : torch::Tensor& getSamples(); - 22 : float getMaxThreads() const; - 23 : void addNode(const std::string&); - 24 : void addEdge(const std::string&, const std::string&); - 25 : std::map<std::string, std::unique_ptr<Node>>& getNodes(); - 26 : std::vector<std::string> getFeatures() const; - 27 : int getStates() const; - 28 : std::vector<std::pair<std::string, std::string>> getEdges() const; - 29 : int getNumEdges() const; - 30 : int getClassNumStates() const; - 31 : std::string getClassName() const; - 32 : /* - 33 : Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. - 34 : */ - 35 : void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); - 36 : void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); - 37 : void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); - 38 : std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions - 39 : torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions - 40 : torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); - 41 : std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>&); // Return mxn std::vector of probabilities - 42 : torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities - 43 : double score(const std::vector<std::vector<int>>&, const std::vector<int>&); - 44 : std::vector<std::string> topological_sort(); - 45 : std::vector<std::string> show() const; - 46 : std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format - 47 : void initialize(); - 48 : std::string dump_cpt() const; - 49 : inline std::string version() { return { project_version.begin(), project_version.end() }; } - 50 : private: - 51 : std::map<std::string, std::unique_ptr<Node>> nodes; - 52 : bool fitted; - 53 : float maxThreads = 0.95; - 54 : int classNumStates; - 55 : std::vector<std::string> features; // Including classname - 56 : std::string className; - 57 : double laplaceSmoothing; - 58 : torch::Tensor samples; // n+1xm tensor used to fit the model - 59 : bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&); - 60 : std::vector<double> predict_sample(const std::vector<int>&); - 61 : std::vector<double> predict_sample(const torch::Tensor&); - 62 : std::vector<double> exactInference(std::map<std::string, int>&); - 63 : double computeFactor(std::map<std::string, int>&); - 64 : void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); - 65 : void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); - 66 : void setStates(const std::map<std::string, std::vector<int>>&); - 67 : }; - 68 : } - 69 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "Node.h" - 8 : - 9 : namespace bayesnet { - 10 : - 11 31339 : Node::Node(const std::string& name) - 12 31339 : : name(name) - 13 : { - 14 31339 : } - 15 9 : void Node::clear() - 16 : { - 17 9 : parents.clear(); - 18 9 : children.clear(); - 19 9 : cpTable = torch::Tensor(); - 20 9 : dimensions.clear(); - 21 9 : numStates = 0; - 22 9 : } - 23 150429643 : std::string Node::getName() const - 24 : { - 25 150429643 : return name; - 26 : } - 27 59262 : void Node::addParent(Node* parent) - 28 : { - 29 59262 : parents.push_back(parent); - 30 59262 : } - 31 17 : void Node::removeParent(Node* parent) - 32 : { - 33 17 : parents.erase(std::remove(parents.begin(), parents.end(), parent), parents.end()); - 34 17 : } - 35 17 : void Node::removeChild(Node* child) - 36 : { - 37 17 : children.erase(std::remove(children.begin(), children.end(), child), children.end()); - 38 17 : } - 39 59235 : void Node::addChild(Node* child) - 40 : { - 41 59235 : children.push_back(child); - 42 59235 : } - 43 5087 : std::vector<Node*>& Node::getParents() - 44 : { - 45 5087 : return parents; - 46 : } - 47 77571 : std::vector<Node*>& Node::getChildren() - 48 : { - 49 77571 : return children; - 50 : } - 51 64124 : int Node::getNumStates() const - 52 : { - 53 64124 : return numStates; - 54 : } - 55 32864 : void Node::setNumStates(int numStates) - 56 : { - 57 32864 : this->numStates = numStates; - 58 32864 : } - 59 429 : torch::Tensor& Node::getCPT() - 60 : { - 61 429 : return cpTable; - 62 : } - 63 : /* - 64 : The MinFill criterion is a heuristic for variable elimination. - 65 : The variable that minimizes the number of edges that need to be added to the graph to make it triangulated. - 66 : This is done by counting the number of edges that need to be added to the graph if the variable is eliminated. - 67 : The variable with the minimum number of edges is chosen. - 68 : Here this is done computing the length of the combinations of the node neighbors taken 2 by 2. - 69 : */ - 70 45 : unsigned Node::minFill() - 71 : { - 72 45 : std::unordered_set<std::string> neighbors; - 73 117 : for (auto child : children) { - 74 72 : neighbors.emplace(child->getName()); - 75 : } - 76 108 : for (auto parent : parents) { - 77 63 : neighbors.emplace(parent->getName()); - 78 : } - 79 45 : auto source = std::vector<std::string>(neighbors.begin(), neighbors.end()); - 80 90 : return combinations(source).size(); - 81 45 : } - 82 45 : std::vector<std::pair<std::string, std::string>> Node::combinations(const std::vector<std::string>& source) - 83 : { - 84 45 : std::vector<std::pair<std::string, std::string>> result; - 85 180 : for (int i = 0; i < source.size(); ++i) { - 86 135 : std::string temp = source[i]; - 87 279 : for (int j = i + 1; j < source.size(); ++j) { - 88 144 : result.push_back({ temp, source[j] }); - 89 : } - 90 135 : } - 91 90 : return result; - 92 45 : } - 93 32894 : void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights) - 94 : { - 95 32894 : dimensions.clear(); - 96 : // Get dimensions of the CPT - 97 32894 : dimensions.push_back(numStates); - 98 94914 : transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); - 99 : // Create a tensor of zeros with the dimensions of the CPT - 100 32894 : cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing; - 101 : // Fill table with counts - 102 32894 : auto pos = find(features.begin(), features.end(), name); - 103 32894 : if (pos == features.end()) { - 104 8 : throw std::logic_error("Feature " + name + " not found in dataset"); - 105 : } - 106 32886 : int name_index = pos - features.begin(); - 107 11221522 : for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { - 108 11188649 : c10::List<c10::optional<at::Tensor>> coordinates; - 109 33565947 : coordinates.push_back(dataset.index({ name_index, n_sample })); - 110 32200749 : for (auto parent : parents) { - 111 21012113 : pos = find(features.begin(), features.end(), parent->getName()); - 112 21012113 : if (pos == features.end()) { - 113 13 : throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset"); - 114 : } - 115 21012100 : int parent_index = pos - features.begin(); - 116 63036300 : coordinates.push_back(dataset.index({ parent_index, n_sample })); - 117 : } - 118 : // Increment the count of the corresponding coordinate - 119 22377272 : cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>()); - 120 11188649 : } - 121 : // Normalize the counts - 122 32873 : cpTable = cpTable / cpTable.sum(0); - 123 43422258 : } - 124 69151761 : float Node::getFactorValue(std::map<std::string, int>& evidence) - 125 : { - 126 69151761 : c10::List<c10::optional<at::Tensor>> coordinates; - 127 : // following predetermined order of indices in the cpTable (see Node.h) - 128 69151761 : coordinates.push_back(at::tensor(evidence[name])); - 129 198453273 : transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); - 130 138303522 : return cpTable.index({ coordinates }).item<float>(); - 131 69151761 : } - 132 732 : std::vector<std::string> Node::graph(const std::string& className) - 133 : { - 134 732 : auto output = std::vector<std::string>(); - 135 732 : auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : ""; - 136 732 : output.push_back(name + " [shape=circle" + suffix + "] \n"); - 137 1840 : transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); }); - 138 1464 : return output; - 139 732 : } - 140 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Network.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Node.cc | -
- |
- 100.0 % | -88 | -88 | -100.0 % | -20 | -20 | -|
Network.cc | -
- |
- 100.0 % | -295 | -295 | -100.0 % | -40 | -40 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Network.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Node.cc | -
- |
- 100.0 % | -88 | -88 | -100.0 % | -20 | -20 | -|
Network.cc | -
- |
- 100.0 % | -295 | -295 | -100.0 % | -40 | -40 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Network.cc | -
- |
- 100.0 % | -295 | -295 | -100.0 % | -40 | -40 | -|
Network.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Node.cc | -
- |
- 100.0 % | -88 | -88 | -100.0 % | -20 | -20 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Network.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Node.cc | -
- |
- 100.0 % | -88 | -88 | -100.0 % | -20 | -20 | -|
Network.cc | -
- |
- 100.0 % | -295 | -295 | -100.0 % | -40 | -40 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Network.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Node.cc | -
- |
- 100.0 % | -88 | -88 | -100.0 % | -20 | -20 | -|
Network.cc | -
- |
- 100.0 % | -295 | -295 | -100.0 % | -40 | -40 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Network.cc | -
- |
- 100.0 % | -295 | -295 | -100.0 % | -40 | -40 | -|
Network.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
Node.cc | -
- |
- 100.0 % | -88 | -88 | -100.0 % | -20 | -20 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include "Mst.h" - 8 : #include "BayesMetrics.h" - 9 : namespace bayesnet { - 10 : //samples is n+1xm tensor used to fit the model - 11 2123 : Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates) - 12 2123 : : samples(samples) - 13 2123 : , className(className) - 14 2123 : , features(features) - 15 2123 : , classNumStates(classNumStates) - 16 : { - 17 2123 : } - 18 : //samples is n+1xm std::vector used to fit the model - 19 96 : Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates) - 20 96 : : samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32)) - 21 96 : , className(className) - 22 96 : , features(features) - 23 96 : , classNumStates(classNumStates) - 24 : { - 25 768 : for (int i = 0; i < vsamples.size(); ++i) { - 26 2688 : samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32)); - 27 : } - 28 384 : samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); - 29 864 : } - 30 478 : std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) - 31 : { - 32 : // Return the K Best features - 33 478 : auto n = features.size(); - 34 478 : if (k == 0) { - 35 6 : k = n; - 36 : } - 37 : // compute scores - 38 478 : scoresKBest.clear(); - 39 478 : featuresKBest.clear(); - 40 1434 : auto label = samples.index({ -1, "..." }); - 41 10522 : for (int i = 0; i < n; ++i) { - 42 30132 : scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights)); - 43 10044 : featuresKBest.push_back(i); - 44 : } - 45 : // sort & reduce scores and features - 46 478 : if (ascending) { - 47 94 : sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) - 48 2088 : { return scoresKBest[i] < scoresKBest[j]; }); - 49 94 : sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>()); - 50 94 : if (k < n) { - 51 154 : for (int i = 0; i < n - k; ++i) { - 52 110 : featuresKBest.erase(featuresKBest.begin()); - 53 110 : scoresKBest.erase(scoresKBest.begin()); - 54 : } - 55 : } - 56 : } else { - 57 384 : sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) - 58 64808 : { return scoresKBest[i] > scoresKBest[j]; }); - 59 384 : sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>()); - 60 384 : featuresKBest.resize(k); - 61 384 : scoresKBest.resize(k); - 62 : } - 63 956 : return featuresKBest; - 64 11000 : } - 65 48 : std::vector<double> Metrics::getScoresKBest() const - 66 : { - 67 48 : return scoresKBest; - 68 : } - 69 : - 70 152 : torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights) - 71 : { - 72 152 : auto result = std::vector<double>(); - 73 152 : auto source = std::vector<std::string>(features); - 74 152 : source.push_back(className); - 75 152 : auto combinations = doCombinations(source); - 76 : // Compute class prior - 77 152 : auto margin = torch::zeros({ classNumStates }, torch::kFloat); - 78 828 : for (int value = 0; value < classNumStates; ++value) { - 79 2704 : auto mask = samples.index({ -1, "..." }) == value; - 80 676 : margin[value] = mask.sum().item<double>() / samples.size(1); - 81 676 : } - 82 4164 : for (auto [first, second] : combinations) { - 83 4012 : int index_first = find(features.begin(), features.end(), first) - features.begin(); - 84 4012 : int index_second = find(features.begin(), features.end(), second) - features.begin(); - 85 4012 : double accumulated = 0; - 86 23820 : for (int value = 0; value < classNumStates; ++value) { - 87 79232 : auto mask = samples.index({ -1, "..." }) == value; - 88 59424 : auto first_dataset = samples.index({ index_first, mask }); - 89 59424 : auto second_dataset = samples.index({ index_second, mask }); - 90 39616 : auto weights_dataset = weights.index({ mask }); - 91 39616 : auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset); - 92 19808 : auto pb = margin[value].item<double>(); - 93 19808 : accumulated += pb * mi; - 94 19808 : } - 95 4012 : result.push_back(accumulated); - 96 4012 : } - 97 152 : long n_vars = source.size(); - 98 152 : auto matrix = torch::zeros({ n_vars, n_vars }); - 99 152 : auto indices = torch::triu_indices(n_vars, n_vars, 1); - 100 4164 : for (auto i = 0; i < result.size(); ++i) { - 101 4012 : auto x = indices[0][i]; - 102 4012 : auto y = indices[1][i]; - 103 4012 : matrix[x][y] = result[i]; - 104 4012 : matrix[y][x] = result[i]; - 105 4012 : } - 106 304 : return matrix; - 107 99868 : } - 108 41732 : double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights) - 109 : { - 110 41732 : torch::Tensor counts = feature.bincount(weights); - 111 41732 : double totalWeight = counts.sum().item<double>(); - 112 41732 : torch::Tensor probs = counts.to(torch::kFloat) / totalWeight; - 113 41732 : torch::Tensor logProbs = torch::log(probs); - 114 41732 : torch::Tensor entropy = -probs * logProbs; - 115 83464 : return entropy.nansum().item<double>(); - 116 41732 : } - 117 : // H(Y|X) = sum_{x in X} p(x) H(Y|X=x) - 118 34276 : double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) - 119 : { - 120 34276 : int numSamples = firstFeature.sizes()[0]; - 121 34276 : torch::Tensor featureCounts = secondFeature.bincount(weights); - 122 34276 : std::unordered_map<int, std::unordered_map<int, double>> jointCounts; - 123 34276 : double totalWeight = 0; - 124 6993324 : for (auto i = 0; i < numSamples; i++) { - 125 6959048 : jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>(); - 126 6959048 : totalWeight += weights[i].item<float>(); - 127 : } - 128 34276 : if (totalWeight == 0) - 129 0 : return 0; - 130 34276 : double entropyValue = 0; - 131 168251 : for (int value = 0; value < featureCounts.sizes()[0]; ++value) { - 132 133975 : double p_f = featureCounts[value].item<double>() / totalWeight; - 133 133975 : double entropy_f = 0; - 134 454356 : for (auto& [label, jointCount] : jointCounts[value]) { - 135 320381 : double p_l_f = jointCount / featureCounts[value].item<double>(); - 136 320381 : if (p_l_f > 0) { - 137 320381 : entropy_f -= p_l_f * log(p_l_f); - 138 : } else { - 139 0 : entropy_f = 0; - 140 : } - 141 : } - 142 133975 : entropyValue += p_f * entropy_f; - 143 : } - 144 34276 : return entropyValue; - 145 34276 : } - 146 : // I(X;Y) = H(Y) - H(Y|X) - 147 34276 : double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) - 148 : { - 149 34276 : return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights); - 150 : } - 151 : /* - 152 : Compute the maximum spanning tree considering the weights as distances - 153 : and the indices of the weights as nodes of this square matrix using - 154 : Kruskal algorithm - 155 : */ - 156 148 : std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) - 157 : { - 158 148 : auto mst = MST(features, weights, root); - 159 296 : return mst.maximumSpanningTree(); - 160 148 : } - 161 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef BAYESNET_METRICS_H - 8 : #define BAYESNET_METRICS_H - 9 : #include <vector> - 10 : #include <string> - 11 : #include <torch/torch.h> - 12 : namespace bayesnet { - 13 : class Metrics { - 14 : public: - 15 2240 : Metrics() = default; - 16 : Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates); - 17 : Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates); - 18 : std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0); - 19 : std::vector<double> getScoresKBest() const; - 20 : double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); - 21 : torch::Tensor conditionalEdge(const torch::Tensor& weights); - 22 : std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root); - 23 : protected: - 24 : torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector - 25 : std::string className; - 26 : double entropy(const torch::Tensor& feature, const torch::Tensor& weights); - 27 : std::vector<std::string> features; - 28 : template <class T> - 29 1391 : std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source) - 30 : { - 31 1391 : std::vector<std::pair<T, T>> result; - 32 6981 : for (int i = 0; i < source.size(); ++i) { - 33 5590 : T temp = source[i]; - 34 16517 : for (int j = i + 1; j < source.size(); ++j) { - 35 10927 : result.push_back({ temp, source[j] }); - 36 : } - 37 : } - 38 2782 : return result; - 39 1391 : } - 40 : template <class T> - 41 94 : T pop_first(std::vector<T>& v) - 42 : { - 43 94 : T temp = v[0]; - 44 94 : v.erase(v.begin()); - 45 94 : return temp; - 46 : } - 47 : private: - 48 : int classNumStates = 0; - 49 : std::vector<double> scoresKBest; - 50 : std::vector<int> featuresKBest; // sorted indices of the features - 51 : double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); - 52 : }; - 53 : } - 54 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #include <sstream> - 8 : #include <vector> - 9 : #include <list> - 10 : #include "Mst.h" - 11 : /* - 12 : Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/ - 13 : - 14 : */ - 15 : - 16 : namespace bayesnet { - 17 296 : Graph::Graph(int V) : V(V), parent(std::vector<int>(V)) - 18 : { - 19 1124 : for (int i = 0; i < V; i++) - 20 976 : parent[i] = i; - 21 148 : G.clear(); - 22 148 : T.clear(); - 23 148 : } - 24 3032 : void Graph::addEdge(int u, int v, float wt) - 25 : { - 26 3032 : G.push_back({ wt, { u, v } }); - 27 3032 : } - 28 14076 : int Graph::find_set(int i) - 29 : { - 30 : // If i is the parent of itself - 31 14076 : if (i == parent[i]) - 32 6064 : return i; - 33 : else - 34 : //else recursively find the parent of i - 35 8012 : return find_set(parent[i]); - 36 : } - 37 828 : void Graph::union_set(int u, int v) - 38 : { - 39 828 : parent[u] = parent[v]; - 40 828 : } - 41 148 : void Graph::kruskal_algorithm() - 42 : { - 43 : // sort the edges ordered on decreasing weight - 44 11864 : stable_sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;}); - 45 3180 : for (int i = 0; i < G.size(); i++) { - 46 : int uSt, vEd; - 47 3032 : uSt = find_set(G[i].second.first); - 48 3032 : vEd = find_set(G[i].second.second); - 49 3032 : if (uSt != vEd) { - 50 828 : T.push_back(G[i]); // add to mst std::vector - 51 828 : union_set(uSt, vEd); - 52 : } - 53 : } - 54 148 : } - 55 : - 56 828 : void insertElement(std::list<int>& variables, int variable) - 57 : { - 58 828 : if (std::find(variables.begin(), variables.end(), variable) == variables.end()) { - 59 828 : variables.push_front(variable); - 60 : } - 61 828 : } - 62 : - 63 148 : std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original) - 64 : { - 65 : // Create the edges of a DAG from the MST - 66 : // replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted - 67 148 : auto result = std::vector<std::pair<int, int>>(); - 68 148 : auto visited = std::vector<int>(); - 69 148 : auto nextVariables = std::list<int>(); - 70 148 : nextVariables.push_front(root_original); - 71 1124 : while (nextVariables.size() > 0) { - 72 976 : int root = nextVariables.front(); - 73 976 : nextVariables.pop_front(); - 74 3464 : for (int i = 0; i < T.size(); ++i) { - 75 2488 : auto [weight, edge] = T[i]; - 76 2488 : auto [from, to] = edge; - 77 2488 : if (from == root || to == root) { - 78 828 : visited.insert(visited.begin(), i); - 79 828 : if (from == root) { - 80 560 : result.push_back({ from, to }); - 81 560 : insertElement(nextVariables, to); - 82 : } else { - 83 268 : result.push_back({ to, from }); - 84 268 : insertElement(nextVariables, from); - 85 : } - 86 : } - 87 : } - 88 : // Remove visited - 89 1804 : for (int i = 0; i < visited.size(); ++i) { - 90 828 : T.erase(T.begin() + visited[i]); - 91 : } - 92 976 : visited.clear(); - 93 : } - 94 148 : if (T.size() > 0) { - 95 0 : for (int i = 0; i < T.size(); ++i) { - 96 0 : auto [weight, edge] = T[i]; - 97 0 : auto [from, to] = edge; - 98 0 : result.push_back({ from, to }); - 99 : } - 100 : } - 101 296 : return result; - 102 148 : } - 103 : - 104 148 : MST::MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) : features(features), weights(weights), root(root) {} - 105 148 : std::vector<std::pair<int, int>> MST::maximumSpanningTree() - 106 : { - 107 148 : auto num_features = features.size(); - 108 148 : Graph g(num_features); - 109 : // Make a complete graph - 110 976 : for (int i = 0; i < num_features - 1; ++i) { - 111 3860 : for (int j = i + 1; j < num_features; ++j) { - 112 3032 : g.addEdge(i, j, weights[i][j].item<float>()); - 113 : } - 114 : } - 115 148 : g.kruskal_algorithm(); - 116 148 : auto mst = g.get_mst(); - 117 296 : return reorder(mst, root); - 118 148 : } - 119 : - 120 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Graph::get_mst() | - -148 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::Graph::get_mst() | - -148 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : #ifndef MST_H - 8 : #define MST_H - 9 : #include <vector> - 10 : #include <string> - 11 : #include <torch/torch.h> - 12 : namespace bayesnet { - 13 : class MST { - 14 : public: - 15 : MST() = default; - 16 : MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root); - 17 : std::vector<std::pair<int, int>> maximumSpanningTree(); - 18 : private: - 19 : torch::Tensor weights; - 20 : std::vector<std::string> features; - 21 : int root = 0; - 22 : }; - 23 : class Graph { - 24 : public: - 25 : explicit Graph(int V); - 26 : void addEdge(int u, int v, float wt); - 27 : int find_set(int i); - 28 : void union_set(int u, int v); - 29 : void kruskal_algorithm(); - 30 148 : std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; } - 31 : private: - 32 : int V; // number of nodes in graph - 33 : std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph - 34 : std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst - 35 : std::vector<int> parent; - 36 : }; - 37 : } - 38 : #endif -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::tensorToVectorDouble(at::Tensor&) | - -32 | - - -
bayesnet::vectorToTensor(std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >&, bool) | - -40 | - - -
bayesnet::argsort(std::vector<double, std::allocator<double> >&) | - -203 | - - -
bayesnet::argsort(std::vector<double, std::allocator<double> >&)::{lambda(int, int)#1}::operator()(int, int) const | - -4041 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
Function Name | - -Hit count | - - -
bayesnet::argsort(std::vector<double, std::allocator<double> >&) | - -203 | - - -
bayesnet::argsort(std::vector<double, std::allocator<double> >&)::{lambda(int, int)#1}::operator()(int, int) const | - -4041 | - - -
bayesnet::tensorToVectorDouble(at::Tensor&) | - -32 | - - -
bayesnet::vectorToTensor(std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >&, bool) | - -40 | - - -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
-Line data Source code- - 1 : // *************************************************************** - 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez - 3 : // SPDX-FileType: SOURCE - 4 : // SPDX-License-Identifier: MIT - 5 : // *************************************************************** - 6 : - 7 : - 8 : #include "bayesnetUtils.h" - 9 : namespace bayesnet { - 10 : // Return the indices in descending order - 11 203 : std::vector<int> argsort(std::vector<double>& nums) - 12 : { - 13 203 : int n = nums.size(); - 14 203 : std::vector<int> indices(n); - 15 203 : iota(indices.begin(), indices.end(), 0); - 16 4244 : sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); - 17 406 : return indices; - 18 203 : } - 19 32 : std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor) - 20 : { - 21 : // convert mxn tensor to mxn std::vector - 22 32 : std::vector<std::vector<double>> result; - 23 : // Iterate over cols - 24 8072 : for (int i = 0; i < dtensor.size(0); ++i) { - 25 24120 : auto col_tensor = dtensor.index({ i, "..." }); - 26 8040 : auto col = std::vector<double>(col_tensor.data_ptr<float>(), col_tensor.data_ptr<float>() + dtensor.size(1)); - 27 8040 : result.push_back(col); - 28 8040 : } - 29 64 : return result; - 30 8072 : } - 31 40 : torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose) - 32 : { - 33 : // convert nxm std::vector to mxn tensor if transpose - 34 40 : long int m = transpose ? vector[0].size() : vector.size(); - 35 40 : long int n = transpose ? vector.size() : vector[0].size(); - 36 40 : auto tensor = torch::zeros({ m, n }, torch::kInt32); - 37 276 : for (int i = 0; i < m; ++i) { - 38 57664 : for (int j = 0; j < n; ++j) { - 39 57428 : tensor[i][j] = transpose ? vector[j][i] : vector[i][j]; - 40 : } - 41 : } - 42 80 : return tensor; - 43 40 : } - 44 : } -- |
-
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Mst.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BayesMetrics.h | -
- |
- 100.0 % | -13 | -13 | -100.0 % | -4 | -4 | -|
bayesnetUtils.cc | -
- |
- 100.0 % | -25 | -25 | -100.0 % | -4 | -4 | -|
Mst.cc | -
- |
- 94.1 % | -68 | -64 | -100.0 % | -10 | -10 | -|
BayesMetrics.cc | -
- |
- 98.2 % | -114 | -112 | -100.0 % | -11 | -11 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Mst.cc | -
- |
- 94.1 % | -68 | -64 | -100.0 % | -10 | -10 | -|
BayesMetrics.cc | -
- |
- 98.2 % | -114 | -112 | -100.0 % | -11 | -11 | -|
Mst.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BayesMetrics.h | -
- |
- 100.0 % | -13 | -13 | -100.0 % | -4 | -4 | -|
bayesnetUtils.cc | -
- |
- 100.0 % | -25 | -25 | -100.0 % | -4 | -4 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BayesMetrics.cc | -
- |
- 98.2 % | -114 | -112 | -100.0 % | -11 | -11 | -|
BayesMetrics.h | -
- |
- 100.0 % | -13 | -13 | -100.0 % | -4 | -4 | -|
Mst.cc | -
- |
- 94.1 % | -68 | -64 | -100.0 % | -10 | -10 | -|
Mst.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
bayesnetUtils.cc | -
- |
- 100.0 % | -25 | -25 | -100.0 % | -4 | -4 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Mst.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BayesMetrics.h | -
- |
- 100.0 % | -13 | -13 | -100.0 % | -4 | -4 | -|
bayesnetUtils.cc | -
- |
- 100.0 % | -25 | -25 | -100.0 % | -4 | -4 | -|
Mst.cc | -
- |
- 94.1 % | -68 | -64 | -100.0 % | -10 | -10 | -|
BayesMetrics.cc | -
- |
- 98.2 % | -114 | -112 | -100.0 % | -11 | -11 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
Mst.cc | -
- |
- 94.1 % | -68 | -64 | -100.0 % | -10 | -10 | -|
BayesMetrics.cc | -
- |
- 98.2 % | -114 | -112 | -100.0 % | -11 | -11 | -|
Mst.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
BayesMetrics.h | -
- |
- 100.0 % | -13 | -13 | -100.0 % | -4 | -4 | -|
bayesnetUtils.cc | -
- |
- 100.0 % | -25 | -25 | -100.0 % | -4 | -4 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Filename | -Line Coverage ( show details ) | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
BayesMetrics.cc | -
- |
- 98.2 % | -114 | -112 | -100.0 % | -11 | -11 | -|
BayesMetrics.h | -
- |
- 100.0 % | -13 | -13 | -100.0 % | -4 | -4 | -|
Mst.cc | -
- |
- 94.1 % | -68 | -64 | -100.0 % | -10 | -10 | -|
Mst.h | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
bayesnetUtils.cc | -
- |
- 100.0 % | -25 | -25 | -100.0 % | -4 | -4 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Directory | -Line Coverage | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
bayesnet | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
bayesnet/feature_selection | -
- |
- 98.7 % | -152 | -150 | -100.0 % | -19 | -19 | -|
bayesnet/utils | -
- |
- 97.3 % | -221 | -215 | -100.0 % | -30 | -30 | -|
bayesnet/ensembles | -
- |
- 99.1 % | -468 | -464 | -100.0 % | -54 | -54 | -|
bayesnet/network | -
- |
- 100.0 % | -384 | -384 | -100.0 % | -61 | -61 | -|
bayesnet/classifiers | -
- |
- 99.0 % | -384 | -380 | -100.0 % | -72 | -72 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Directory | -Line Coverage | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
bayesnet/utils | -
- |
- 97.3 % | -221 | -215 | -100.0 % | -30 | -30 | -|
bayesnet/feature_selection | -
- |
- 98.7 % | -152 | -150 | -100.0 % | -19 | -19 | -|
bayesnet/classifiers | -
- |
- 99.0 % | -384 | -380 | -100.0 % | -72 | -72 | -|
bayesnet/ensembles | -
- |
- 99.1 % | -468 | -464 | -100.0 % | -54 | -54 | -|
bayesnet | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
bayesnet/network | -
- |
- 100.0 % | -384 | -384 | -100.0 % | -61 | -61 | -
Generated by: LCOV version 2.0-1 |
LCOV - code coverage report | |||||||||||||||||||||||||
-
|
- |||||||||||||||||||||||||
- | - | - | - | - | - | - | ||
Directory | -Line Coverage | -Function Coverage | -||||||
Rate | -Total | -Hit | -Rate | -Total | -Hit | -|||
bayesnet | -
- |
- 100.0 % | -1 | -1 | -100.0 % | -1 | -1 | -|
bayesnet/classifiers | -
- |
- 99.0 % | -384 | -380 | -100.0 % | -72 | -72 | -|
bayesnet/ensembles | -
- |
- 99.1 % | -468 | -464 | -100.0 % | -54 | -54 | -|
bayesnet/feature_selection | -
- |
- 98.7 % | -152 | -150 | -100.0 % | -19 | -19 | -|
bayesnet/network | -
- |
- 100.0 % | -384 | -384 | -100.0 % | -61 | -61 | -|
bayesnet/utils | -
- |
- 97.3 % | -221 | -215 | -100.0 % | -30 | -30 | -
Generated by: LCOV version 2.0-1 |