LCOV - coverage.info - bayesnet/classifiers/Classifier.cc

LCOV - code coverage report

Current view:	top level - bayesnet/classifiers - Classifier.cc (source / functions)		Coverage	Total	Hit
Test:	coverage.info	Lines:	100.0 %	126	126
Test Date:	2024-04-30 13:59:18	Functions:	100.0 %	24	24

            Line data    Source code

       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include <sstream>
       8              : #include "bayesnet/utils/bayesnetUtils.h"
       9              : #include "Classifier.h"
      10              : 
      11              : namespace bayesnet {
      12         2658 :     Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
      13              :     const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
      14         1932 :     Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
      15              :     {
      16         1932 :         this->features = features;
      17         1932 :         this->className = className;
      18         1932 :         this->states = states;
      19         1932 :         m = dataset.size(1);
      20         1932 :         n = features.size();
      21         1932 :         checkFitParameters();
      22         1884 :         auto n_classes = states.at(className).size();
      23         1884 :         metrics = Metrics(dataset, features, className, n_classes);
      24         1884 :         model.initialize();
      25         1884 :         buildModel(weights);
      26         1884 :         trainModel(weights);
      27         1860 :         fitted = true;
      28         1860 :         return *this;
      29              :     }
      30          486 :     void Classifier::buildDataset(torch::Tensor& ytmp)
      31              :     {
      32              :         try {
      33          486 :             auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
      34         1506 :             dataset = torch::cat({ dataset, yresized }, 0);
      35          486 :         }
      36           24 :         catch (const std::exception& e) {
      37           24 :             std::stringstream oss;
      38           24 :             oss << "* Error in X and y dimensions *\n";
      39           24 :             oss << "X dimensions: " << dataset.sizes() << "\n";
      40           24 :             oss << "y dimensions: " << ytmp.sizes();
      41           24 :             throw std::runtime_error(oss.str());
      42           48 :         }
      43          972 :     }
      44         1680 :     void Classifier::trainModel(const torch::Tensor& weights)
      45              :     {
      46         1680 :         model.fit(dataset, weights, features, className, states);
      47         1680 :     }
      48              :     // X is nxm where n is the number of features and m the number of samples
      49          192 :     Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
      50              :     {
      51          192 :         dataset = X;
      52          192 :         buildDataset(y);
      53          180 :         const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
      54          312 :         return build(features, className, states, weights);
      55          180 :     }
      56              :     // X is nxm where n is the number of features and m the number of samples
      57          180 :     Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
      58              :     {
      59          180 :         dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
      60         1254 :         for (int i = 0; i < X.size(); ++i) {
      61         4296 :             dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32));
      62              :         }
      63          180 :         auto ytmp = torch::tensor(y, torch::kInt32);
      64          180 :         buildDataset(ytmp);
      65          168 :         const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
      66          312 :         return build(features, className, states, weights);
      67         1278 :     }
      68          594 :     Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
      69              :     {
      70          594 :         this->dataset = dataset;
      71          594 :         const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
      72         1188 :         return build(features, className, states, weights);
      73          594 :     }
      74          990 :     Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
      75              :     {
      76          990 :         this->dataset = dataset;
      77          990 :         return build(features, className, states, weights);
      78              :     }
      79         1932 :     void Classifier::checkFitParameters()
      80              :     {
      81         1932 :         if (torch::is_floating_point(dataset)) {
      82           12 :             throw std::invalid_argument("dataset (X, y) must be of type Integer");
      83              :         }
      84         1920 :         if (dataset.size(0) - 1 != features.size()) {
      85           12 :             throw std::invalid_argument("Classifier: X " + std::to_string(dataset.size(0) - 1) + " and features " + std::to_string(features.size()) + " must have the same number of features");
      86              :         }
      87         1908 :         if (states.find(className) == states.end()) {
      88           12 :             throw std::invalid_argument("class name not found in states");
      89              :         }
      90        42624 :         for (auto feature : features) {
      91        40740 :             if (states.find(feature) == states.end()) {
      92           12 :                 throw std::invalid_argument("feature [" + feature + "] not found in states");
      93              :             }
      94        40740 :         }
      95         1884 :     }
      96         2550 :     torch::Tensor Classifier::predict(torch::Tensor& X)
      97              :     {
      98         2550 :         if (!fitted) {
      99           24 :             throw std::logic_error(CLASSIFIER_NOT_FITTED);
     100              :         }
     101         2526 :         return model.predict(X);
     102              :     }
     103           24 :     std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
     104              :     {
     105           24 :         if (!fitted) {
     106           12 :             throw std::logic_error(CLASSIFIER_NOT_FITTED);
     107              :         }
     108           12 :         auto m_ = X[0].size();
     109           12 :         auto n_ = X.size();
     110           12 :         std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
     111           60 :         for (auto i = 0; i < n_; i++) {
     112           96 :             Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
     113              :         }
     114           12 :         auto yp = model.predict(Xd);
     115           24 :         return yp;
     116           12 :     }
     117         2226 :     torch::Tensor Classifier::predict_proba(torch::Tensor& X)
     118              :     {
     119         2226 :         if (!fitted) {
     120           12 :             throw std::logic_error(CLASSIFIER_NOT_FITTED);
     121              :         }
     122         2214 :         return model.predict_proba(X);
     123              :     }
     124          390 :     std::vector<std::vector<double>> Classifier::predict_proba(std::vector<std::vector<int>>& X)
     125              :     {
     126          390 :         if (!fitted) {
     127           12 :             throw std::logic_error(CLASSIFIER_NOT_FITTED);
     128              :         }
     129          378 :         auto m_ = X[0].size();
     130          378 :         auto n_ = X.size();
     131          378 :         std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
     132              :         // Convert to nxm vector
     133         3240 :         for (auto i = 0; i < n_; i++) {
     134         5724 :             Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
     135              :         }
     136          378 :         auto yp = model.predict_proba(Xd);
     137          756 :         return yp;
     138          378 :     }
     139          168 :     float Classifier::score(torch::Tensor& X, torch::Tensor& y)
     140              :     {
     141          168 :         torch::Tensor y_pred = predict(X);
     142          312 :         return (y_pred == y).sum().item<float>() / y.size(0);
     143          156 :     }
     144           24 :     float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
     145              :     {
     146           24 :         if (!fitted) {
     147           12 :             throw std::logic_error(CLASSIFIER_NOT_FITTED);
     148              :         }
     149           12 :         return model.score(X, y);
     150              :     }
     151           36 :     std::vector<std::string> Classifier::show() const
     152              :     {
     153           36 :         return model.show();
     154              :     }
     155         1680 :     void Classifier::addNodes()
     156              :     {
     157              :         // Add all nodes to the network
     158        39648 :         for (const auto& feature : features) {
     159        37968 :             model.addNode(feature);
     160              :         }
     161         1680 :         model.addNode(className);
     162         1680 :     }
     163          282 :     int Classifier::getNumberOfNodes() const
     164              :     {
     165              :         // Features does not include class
     166          282 :         return fitted ? model.getFeatures().size() : 0;
     167              :     }
     168          282 :     int Classifier::getNumberOfEdges() const
     169              :     {
     170          282 :         return fitted ? model.getNumEdges() : 0;
     171              :     }
     172           36 :     int Classifier::getNumberOfStates() const
     173              :     {
     174           36 :         return fitted ? model.getStates() : 0;
     175              :     }
     176          510 :     int Classifier::getClassNumStates() const
     177              :     {
     178          510 :         return fitted ? model.getClassNumStates() : 0;
     179              :     }
     180            6 :     std::vector<std::string> Classifier::topological_order()
     181              :     {
     182            6 :         return model.topological_sort();
     183              :     }
     184            6 :     std::string Classifier::dump_cpt() const
     185              :     {
     186            6 :         return model.dump_cpt();
     187              :     }
     188          126 :     void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
     189              :     {
     190          126 :         if (!hyperparameters.empty()) {
     191           12 :             throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
     192              :         }
     193          114 :     }
     194              : }

Generated by: LCOV version 2.0-1