mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-18 09:05:55 +00:00
Compare commits
2 Commits
boostAODE
...
complete_c
Author | SHA1 | Date | |
---|---|---|---|
7f5ea1ab1e
|
|||
168cc368ee
|
16
MANIFEST.in
16
MANIFEST.in
@@ -1,5 +1,13 @@
|
||||
include README.md LICENSE
|
||||
include bayesclass/FeatureSelect.h
|
||||
include bayesclass/Node.h
|
||||
include bayesclass/Network.h
|
||||
include bayesclass/Metrics.hpp
|
||||
include bayesclass/cpp/FeatureSelect.h
|
||||
include bayesclass/cpp/Node.h
|
||||
include bayesclass/cpp/Mst.h
|
||||
include bayesclass/cpp/Network.h
|
||||
include bayesclass/cpp/Metrics.hpp
|
||||
include bayesclass/cpp/BaseClassifier.h
|
||||
include bayesclass/cpp/Ensemble.h
|
||||
include bayesclass/cpp/TAN.h
|
||||
include bayesclass/cpp/KDB.h
|
||||
include bayesclass/cpp/SPODE.h
|
||||
include bayesclass/cpp/AODE.h
|
||||
include bayesclass/cpp/utils.h
|
File diff suppressed because it is too large
Load Diff
@@ -2,9 +2,10 @@
|
||||
# cython: language_level = 3
|
||||
from libcpp.vector cimport vector
|
||||
from libcpp.string cimport string
|
||||
from libcpp.map cimport map
|
||||
import numpy as np
|
||||
|
||||
cdef extern from "Network.h" namespace "bayesnet":
|
||||
cdef extern from "cpp/Network.h" namespace "bayesnet":
|
||||
cdef cppclass Network:
|
||||
Network(float, float) except +
|
||||
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string)
|
||||
@@ -58,7 +59,7 @@ cdef class BayesNetwork:
|
||||
def __reduce__(self):
|
||||
return (BayesNetwork, ())
|
||||
|
||||
cdef extern from "Metrics.hpp" namespace "bayesnet":
|
||||
cdef extern from "cpp/Metrics.hpp" namespace "bayesnet":
|
||||
cdef cppclass Metrics:
|
||||
Metrics(vector[vector[int]], vector[int], vector[string]&, string&, int) except +
|
||||
vector[float] conditionalEdgeWeights()
|
||||
@@ -76,3 +77,101 @@ cdef class CMetrics:
|
||||
def __reduce__(self):
|
||||
return (CMetrics, ())
|
||||
|
||||
cdef extern from "cpp/TAN.h" namespace "bayesnet":
|
||||
cdef cppclass CTAN:
|
||||
CTAN() except +
|
||||
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
|
||||
vector[int] predict(vector[vector[int]]&)
|
||||
vector[vector[double]] predict_proba(vector[vector[int]]&)
|
||||
float score(const vector[vector[int]]&, const vector[int]&)
|
||||
vector[string] graph()
|
||||
|
||||
cdef extern from "cpp/KDB.h" namespace "bayesnet":
|
||||
cdef cppclass CKDB:
|
||||
CKDB(int) except +
|
||||
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
|
||||
vector[int] predict(vector[vector[int]]&)
|
||||
vector[vector[double]] predict_proba(vector[vector[int]]&)
|
||||
float score(const vector[vector[int]]&, const vector[int]&)
|
||||
vector[string] graph()
|
||||
|
||||
cdef extern from "cpp/AODE.h" namespace "bayesnet":
|
||||
cdef cppclass CAODE:
|
||||
CAODE() except +
|
||||
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
|
||||
vector[int] predict(vector[vector[int]]&)
|
||||
vector[vector[double]] predict_proba(vector[vector[int]]&)
|
||||
float score(const vector[vector[int]]&, const vector[int]&)
|
||||
vector[string] graph()
|
||||
|
||||
cdef class TAN:
|
||||
cdef CTAN *thisptr
|
||||
def __cinit__(self):
|
||||
self.thisptr = new CTAN()
|
||||
def __dealloc__(self):
|
||||
del self.thisptr
|
||||
def fit(self, X, y, features, className, states):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
features_bytes = [x.encode() for x in features]
|
||||
states_dict = {key.encode(): value for key, value in states.items()}
|
||||
states_dict[className.encode()] = np.unique(y).tolist()
|
||||
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
|
||||
return self
|
||||
def predict(self, X):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
return self.thisptr.predict(X_)
|
||||
def score(self, X, y):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
return self.thisptr.score(X_, y)
|
||||
def graph(self):
|
||||
return self.thisptr.graph()
|
||||
def __reduce__(self):
|
||||
return (TAN, ())
|
||||
|
||||
cdef class CKDB:
|
||||
cdef KDB *thisptr
|
||||
def __cinit__(self, k):
|
||||
self.thisptr = new KDB(k)
|
||||
def __dealloc__(self):
|
||||
del self.thisptr
|
||||
def fit(self, X, y, features, className, states):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
features_bytes = [x.encode() for x in features]
|
||||
states_dict = {key.encode(): value for key, value in states.items()}
|
||||
states_dict[className.encode()] = np.unique(y).tolist()
|
||||
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
|
||||
return self
|
||||
def predict(self, X):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
return self.thisptr.predict(X_)
|
||||
def score(self, X, y):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
return self.thisptr.score(X_, y)
|
||||
def graph(self):
|
||||
return self.thisptr.graph()
|
||||
def __reduce__(self):
|
||||
return (CKDB, ())
|
||||
|
||||
cdef class CAODE:
|
||||
cdef AODE *thisptr
|
||||
def __cinit__(self):
|
||||
self.thisptr = new AODE()
|
||||
def __dealloc__(self):
|
||||
del self.thisptr
|
||||
def fit(self, X, y, features, className, states):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
features_bytes = [x.encode() for x in features]
|
||||
states_dict = {key.encode(): value for key, value in states.items()}
|
||||
states_dict[className.encode()] = np.unique(y).tolist()
|
||||
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
|
||||
return self
|
||||
def predict(self, X):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
return self.thisptr.predict(X_)
|
||||
def score(self, X, y):
|
||||
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||
return self.thisptr.score(X_, y)
|
||||
def graph(self):
|
||||
return self.thisptr.graph()
|
||||
def __reduce__(self):
|
||||
return (CAODE, ())
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,7 @@ from libcpp.string cimport string
|
||||
from libcpp cimport bool
|
||||
|
||||
|
||||
cdef extern from "FeatureSelect.h" namespace "features":
|
||||
cdef extern from "cpp/FeatureSelect.h" namespace "features":
|
||||
ctypedef float precision_t
|
||||
cdef cppclass SelectKBestWeighted:
|
||||
SelectKBestWeighted(vector[vector[int]]&, vector[int]&, vector[precision_t]&, int, bool) except +
|
||||
|
16
bayesclass/cpp/AODE.cc
Normal file
16
bayesclass/cpp/AODE.cc
Normal file
@@ -0,0 +1,16 @@
|
||||
#include "AODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
AODE::AODE() : Ensemble() {}
|
||||
void AODE::train()
|
||||
{
|
||||
models.clear();
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
models.push_back(std::make_unique<SPODE>(i));
|
||||
}
|
||||
}
|
||||
vector<string> AODE::graph(string title)
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
}
|
||||
}
|
14
bayesclass/cpp/AODE.h
Normal file
14
bayesclass/cpp/AODE.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef AODE_H
|
||||
#define AODE_H
|
||||
#include "Ensemble.h"
|
||||
#include "SPODE.h"
|
||||
namespace bayesnet {
|
||||
class AODE : public Ensemble {
|
||||
protected:
|
||||
void train() override;
|
||||
public:
|
||||
AODE();
|
||||
vector<string> graph(string title = "AODE");
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,34 +1,34 @@
|
||||
#include "BaseClassifier.h"
|
||||
#include "utils.h"
|
||||
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
|
||||
BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0) {}
|
||||
BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
|
||||
BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
|
||||
dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
|
||||
this->features = features;
|
||||
this->className = className;
|
||||
this->states = states;
|
||||
checkFitParameters();
|
||||
auto n_classes = states[className].size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
train();
|
||||
model.fit(Xv, yv, features, className);
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
BaseClassifier& BaseClassifier::fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = X;
|
||||
this->y = y;
|
||||
return build(features, className, states);
|
||||
}
|
||||
BaseClassifier& BaseClassifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
|
||||
Xv = X;
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
|
||||
}
|
||||
this->y = torch::tensor(y, kInt64);
|
||||
yv = y;
|
||||
return build(features, className, states);
|
||||
}
|
||||
void BaseClassifier::checkFitParameters()
|
||||
@@ -51,25 +51,12 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
}
|
||||
vector<vector<int>> tensorToVector(const torch::Tensor& tensor)
|
||||
{
|
||||
// convert mxn tensor to nxm vector
|
||||
vector<vector<int>> result;
|
||||
auto tensor_accessor = tensor.accessor<int, 2>();
|
||||
|
||||
// Iterate over columns and rows of the tensor
|
||||
for (int j = 0; j < tensor.size(1); ++j) {
|
||||
vector<int> column;
|
||||
for (int i = 0; i < tensor.size(0); ++i) {
|
||||
column.push_back(tensor_accessor[i][j]);
|
||||
}
|
||||
result.push_back(column);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Tensor BaseClassifier::predict(Tensor& X)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Classifier has not been fitted");
|
||||
}
|
||||
auto m_ = X.size(0);
|
||||
auto n_ = X.size(1);
|
||||
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
|
||||
@@ -81,13 +68,60 @@ namespace bayesnet {
|
||||
auto ypred = torch::tensor(yp, torch::kInt64);
|
||||
return ypred;
|
||||
}
|
||||
vector<int> BaseClassifier::predict(vector<vector<int>>& X)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Classifier has not been fitted");
|
||||
}
|
||||
auto m_ = X[0].size();
|
||||
auto n_ = X.size();
|
||||
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
|
||||
for (auto i = 0; i < n_; i++) {
|
||||
Xd[i] = vector<int>(X[i].begin(), X[i].end());
|
||||
}
|
||||
auto yp = model.predict(Xd);
|
||||
return yp;
|
||||
}
|
||||
float BaseClassifier::score(Tensor& X, Tensor& y)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Classifier has not been fitted");
|
||||
}
|
||||
Tensor y_pred = predict(X);
|
||||
return (y_pred == y).sum().item<float>() / y.size(0);
|
||||
}
|
||||
float BaseClassifier::score(vector<vector<int>>& X, vector<int>& y)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Classifier has not been fitted");
|
||||
}
|
||||
auto m_ = X[0].size();
|
||||
auto n_ = X.size();
|
||||
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
|
||||
for (auto i = 0; i < n_; i++) {
|
||||
Xd[i] = vector<int>(X[i].begin(), X[i].end());
|
||||
}
|
||||
return model.score(Xd, y);
|
||||
}
|
||||
vector<string> BaseClassifier::show()
|
||||
{
|
||||
return model.show();
|
||||
}
|
||||
void BaseClassifier::addNodes()
|
||||
{
|
||||
// Add all nodes to the network
|
||||
for (auto feature : features) {
|
||||
model.addNode(feature, states[feature].size());
|
||||
}
|
||||
model.addNode(className, states[className].size());
|
||||
}
|
||||
int BaseClassifier::getNumberOfNodes()
|
||||
{
|
||||
// Features does not include class
|
||||
return fitted ? model.getFeatures().size() + 1 : 0;
|
||||
}
|
||||
int BaseClassifier::getNumberOfEdges()
|
||||
{
|
||||
return fitted ? model.getEdges().size() : 0;
|
||||
}
|
||||
}
|
@@ -1,19 +1,25 @@
|
||||
#ifndef CLASSIFIERS_H
|
||||
#define CLASSIFIERS_H
|
||||
#include <torch/torch.h>
|
||||
#include "Network.h"
|
||||
#include "Metrics.hpp"
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
|
||||
namespace bayesnet {
|
||||
class BaseClassifier {
|
||||
private:
|
||||
bool fitted;
|
||||
BaseClassifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
|
||||
protected:
|
||||
Network model;
|
||||
int m, n; // m: number of samples, n: number of features
|
||||
Tensor X;
|
||||
vector<vector<int>> Xv;
|
||||
Tensor y;
|
||||
vector<int> yv;
|
||||
Tensor dataset;
|
||||
Metrics metrics;
|
||||
vector<string> features;
|
||||
string className;
|
||||
map<string, vector<int>> states;
|
||||
@@ -21,14 +27,17 @@ namespace bayesnet {
|
||||
virtual void train() = 0;
|
||||
public:
|
||||
BaseClassifier(Network model);
|
||||
Tensor& getX();
|
||||
vector<string>& getFeatures();
|
||||
string& getClassName();
|
||||
BaseClassifier& fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states);
|
||||
virtual ~BaseClassifier() = default;
|
||||
BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
|
||||
void addNodes();
|
||||
int getNumberOfNodes();
|
||||
int getNumberOfEdges();
|
||||
Tensor predict(Tensor& X);
|
||||
vector<int> predict(vector<vector<int>>& X);
|
||||
float score(Tensor& X, Tensor& y);
|
||||
float score(vector<vector<int>>& X, vector<int>& y);
|
||||
vector<string> show();
|
||||
virtual vector<string> graph(string title) = 0;
|
||||
};
|
||||
}
|
||||
#endif
|
112
bayesclass/cpp/Ensemble.cc
Normal file
112
bayesclass/cpp/Ensemble.cc
Normal file
@@ -0,0 +1,112 @@
|
||||
#include "Ensemble.h"
|
||||
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
|
||||
Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {}
|
||||
Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
dataset = cat({ X, y.view({y.size(0), 1}) }, 1);
|
||||
this->features = features;
|
||||
this->className = className;
|
||||
this->states = states;
|
||||
auto n_classes = states[className].size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
// Build models
|
||||
train();
|
||||
// Train models
|
||||
n_models = models.size();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
models[i]->fit(Xv, yv, features, className, states);
|
||||
}
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
|
||||
Xv = X;
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
|
||||
}
|
||||
this->y = torch::tensor(y, kInt64);
|
||||
yv = y;
|
||||
return build(features, className, states);
|
||||
}
|
||||
Tensor Ensemble::predict(Tensor& X)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64);
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
y_pred.index_put_({ "...", i }, models[i]->predict(X));
|
||||
}
|
||||
return torch::tensor(voting(y_pred));
|
||||
}
|
||||
vector<int> Ensemble::voting(Tensor& y_pred)
|
||||
{
|
||||
auto y_pred_ = y_pred.accessor<int64_t, 2>();
|
||||
vector<int> y_pred_final;
|
||||
for (int i = 0; i < y_pred.size(0); ++i) {
|
||||
vector<float> votes(states[className].size(), 0);
|
||||
for (int j = 0; j < y_pred.size(1); ++j) {
|
||||
votes[y_pred_[i][j]] += 1;
|
||||
}
|
||||
auto indices = argsort(votes);
|
||||
y_pred_final.push_back(indices[0]);
|
||||
}
|
||||
return y_pred_final;
|
||||
}
|
||||
vector<int> Ensemble::predict(vector<vector<int>>& X)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
long m_ = X[0].size();
|
||||
long n_ = X.size();
|
||||
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
|
||||
for (auto i = 0; i < n_; i++) {
|
||||
Xd[i] = vector<int>(X[i].begin(), X[i].end());
|
||||
}
|
||||
Tensor y_pred = torch::zeros({ m_, n_models }, kInt64);
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64));
|
||||
}
|
||||
return voting(y_pred);
|
||||
}
|
||||
float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
auto y_pred = predict(X);
|
||||
int correct = 0;
|
||||
for (int i = 0; i < y_pred.size(); ++i) {
|
||||
if (y_pred[i] == y[i]) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return (double)correct / y_pred.size();
|
||||
|
||||
}
|
||||
vector<string> Ensemble::show()
|
||||
{
|
||||
auto result = vector<string>();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
auto res = models[i]->show();
|
||||
result.insert(result.end(), res.begin(), res.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
vector<string> Ensemble::graph(string title)
|
||||
{
|
||||
auto result = vector<string>();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
auto res = models[i]->graph(title + "_" + to_string(i));
|
||||
result.insert(result.end(), res.begin(), res.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
42
bayesclass/cpp/Ensemble.h
Normal file
42
bayesclass/cpp/Ensemble.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef ENSEMBLE_H
|
||||
#define ENSEMBLE_H
|
||||
#include <torch/torch.h>
|
||||
#include "BaseClassifier.h"
|
||||
#include "Metrics.hpp"
|
||||
#include "utils.h"
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
|
||||
namespace bayesnet {
|
||||
class Ensemble {
|
||||
private:
|
||||
bool fitted;
|
||||
long n_models;
|
||||
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
|
||||
protected:
|
||||
vector<unique_ptr<BaseClassifier>> models;
|
||||
int m, n; // m: number of samples, n: number of features
|
||||
Tensor X;
|
||||
vector<vector<int>> Xv;
|
||||
Tensor y;
|
||||
vector<int> yv;
|
||||
Tensor dataset;
|
||||
Metrics metrics;
|
||||
vector<string> features;
|
||||
string className;
|
||||
map<string, vector<int>> states;
|
||||
void virtual train() = 0;
|
||||
vector<int> voting(Tensor& y_pred);
|
||||
public:
|
||||
Ensemble();
|
||||
virtual ~Ensemble() = default;
|
||||
Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
|
||||
Tensor predict(Tensor& X);
|
||||
vector<int> predict(vector<vector<int>>& X);
|
||||
float score(Tensor& X, Tensor& y);
|
||||
float score(vector<vector<int>>& X, vector<int>& y);
|
||||
vector<string> show();
|
||||
vector<string> graph(string title);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,17 +1,9 @@
|
||||
#include "KDB.h"
|
||||
#include "Metrics.hpp"
|
||||
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
vector<int> argsort(vector<float>& nums)
|
||||
{
|
||||
int n = nums.size();
|
||||
vector<int> indices(n);
|
||||
iota(indices.begin(), indices.end(), 0);
|
||||
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
|
||||
return indices;
|
||||
}
|
||||
|
||||
KDB::KDB(int k, float theta) : BaseClassifier(Network()), k(k), theta(theta) {}
|
||||
void KDB::train()
|
||||
{
|
||||
@@ -36,31 +28,23 @@ namespace bayesnet {
|
||||
*/
|
||||
// 1. For each feature Xi, compute mutual information, I(X;C),
|
||||
// where C is the class.
|
||||
cout << "Computing mutual information between features and class" << endl;
|
||||
auto n_classes = states[className].size();
|
||||
auto metrics = Metrics(dataset, features, className, n_classes);
|
||||
vector <float> mi;
|
||||
for (auto i = 0; i < features.size(); i++) {
|
||||
Tensor firstFeature = X.index({ "...", i });
|
||||
mi.push_back(metrics.mutualInformation(firstFeature, y));
|
||||
cout << "Mutual information between " << features[i] << " and " << className << " is " << mi[i] << endl;
|
||||
}
|
||||
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each
|
||||
auto conditionalEdgeWeights = metrics.conditionalEdge();
|
||||
cout << "Conditional edge weights" << endl;
|
||||
cout << conditionalEdgeWeights << endl;
|
||||
// 3. Let the used variable list, S, be empty.
|
||||
vector<int> S;
|
||||
// 4. Let the DAG network being constructed, BN, begin with a single
|
||||
// class node, C.
|
||||
model.addNode(className, states[className].size());
|
||||
cout << "Adding node " << className << " to the network" << endl;
|
||||
// 5. Repeat until S includes all domain features
|
||||
// 5.1. Select feature Xmax which is not in S and has the largest value
|
||||
// I(Xmax;C).
|
||||
auto order = argsort(mi);
|
||||
for (auto idx : order) {
|
||||
cout << idx << " " << mi[idx] << endl;
|
||||
// 5.2. Add a node to BN representing Xmax.
|
||||
model.addNode(features[idx], states[features[idx]].size());
|
||||
// 5.3. Add an arc from C to Xmax in BN.
|
||||
@@ -76,8 +60,6 @@ namespace bayesnet {
|
||||
{
|
||||
auto n_edges = min(k, static_cast<int>(S.size()));
|
||||
auto cond_w = clone(weights);
|
||||
cout << "Conditional edge weights cloned for idx " << idx << endl;
|
||||
cout << cond_w << endl;
|
||||
bool exit_cond = k == 0;
|
||||
int num = 0;
|
||||
while (!exit_cond) {
|
||||
@@ -93,18 +75,16 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
cond_w.index_put_({ idx, max_minfo }, -1);
|
||||
cout << "Conditional edge weights cloned for idx " << idx << " After -1" << endl;
|
||||
cout << cond_w << endl;
|
||||
cout << "cond_w.index({ idx, '...'})" << endl;
|
||||
cout << cond_w.index({ idx, "..." }) << endl;
|
||||
auto candidates_mask = cond_w.index({ idx, "..." }).gt(theta);
|
||||
auto candidates = candidates_mask.nonzero();
|
||||
cout << "Candidates mask" << endl;
|
||||
cout << candidates_mask << endl;
|
||||
cout << "Candidates: " << endl;
|
||||
cout << candidates << endl;
|
||||
cout << "Candidates size: " << candidates.size(0) << endl;
|
||||
exit_cond = num == n_edges || candidates.size(0) == 0;
|
||||
}
|
||||
}
|
||||
vector<string> KDB::graph(string title)
|
||||
{
|
||||
if (title == "KDB") {
|
||||
title += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
|
||||
}
|
||||
return model.graph(title);
|
||||
}
|
||||
}
|
@@ -1,6 +1,7 @@
|
||||
#ifndef KDB_H
|
||||
#define KDB_H
|
||||
#include "BaseClassifier.h"
|
||||
#include "utils.h"
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
@@ -13,6 +14,7 @@ namespace bayesnet {
|
||||
void train() override;
|
||||
public:
|
||||
KDB(int k, float theta = 0.03);
|
||||
vector<string> graph(string name = "KDB") override;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,4 +1,5 @@
|
||||
#include "Metrics.hpp"
|
||||
#include "Mst.h"
|
||||
using namespace std;
|
||||
namespace bayesnet {
|
||||
Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
|
||||
@@ -116,4 +117,15 @@ namespace bayesnet {
|
||||
{
|
||||
return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
|
||||
}
|
||||
/*
|
||||
Compute the maximum spanning tree considering the weights as distances
|
||||
and the indices of the weights as nodes of this square matrix using
|
||||
Kruskal algorithm
|
||||
*/
|
||||
vector<pair<int, int>> Metrics::maximumSpanningTree(vector<string> features, Tensor& weights, int root)
|
||||
{
|
||||
auto result = vector<pair<int, int>>();
|
||||
auto mst = MST(features, weights, root);
|
||||
return mst.maximumSpanningTree();
|
||||
}
|
||||
}
|
@@ -3,23 +3,26 @@
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
class Metrics {
|
||||
private:
|
||||
torch::Tensor samples;
|
||||
Tensor samples;
|
||||
vector<string> features;
|
||||
string className;
|
||||
int classNumStates;
|
||||
vector<pair<string, string>> doCombinations(const vector<string>&);
|
||||
double entropy(torch::Tensor&);
|
||||
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
|
||||
public:
|
||||
double mutualInformation(torch::Tensor&, torch::Tensor&);
|
||||
Metrics(torch::Tensor&, vector<string>&, string&, int);
|
||||
Metrics() = default;
|
||||
Metrics(Tensor&, vector<string>&, string&, int);
|
||||
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
|
||||
double entropy(Tensor&);
|
||||
double conditionalEntropy(Tensor&, Tensor&);
|
||||
double mutualInformation(Tensor&, Tensor&);
|
||||
vector<float> conditionalEdgeWeights();
|
||||
torch::Tensor conditionalEdge();
|
||||
Tensor conditionalEdge();
|
||||
vector<pair<string, string>> doCombinations(const vector<string>&);
|
||||
vector<pair<int, int>> maximumSpanningTree(vector<string> features, Tensor& weights, int root);
|
||||
};
|
||||
}
|
||||
#endif
|
115
bayesclass/cpp/Mst.cc
Normal file
115
bayesclass/cpp/Mst.cc
Normal file
@@ -0,0 +1,115 @@
|
||||
#include "Mst.h"
|
||||
#include <vector>
|
||||
/*
|
||||
Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/
|
||||
|
||||
*/
|
||||
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
Graph::Graph(int V)
|
||||
{
|
||||
parent = vector<int>(V);
|
||||
for (int i = 0; i < V; i++)
|
||||
parent[i] = i;
|
||||
G.clear();
|
||||
T.clear();
|
||||
}
|
||||
void Graph::addEdge(int u, int v, float wt)
|
||||
{
|
||||
G.push_back({ wt, { u, v } });
|
||||
}
|
||||
int Graph::find_set(int i)
|
||||
{
|
||||
// If i is the parent of itself
|
||||
if (i == parent[i])
|
||||
return i;
|
||||
else
|
||||
//else recursively find the parent of i
|
||||
return find_set(parent[i]);
|
||||
}
|
||||
void Graph::union_set(int u, int v)
|
||||
{
|
||||
parent[u] = parent[v];
|
||||
}
|
||||
void Graph::kruskal_algorithm()
|
||||
{
|
||||
int i, uSt, vEd;
|
||||
// sort the edges ordered on decreasing weight
|
||||
sort(G.begin(), G.end(), [](auto& left, auto& right) {return left.first > right.first;});
|
||||
for (i = 0; i < G.size(); i++) {
|
||||
uSt = find_set(G[i].second.first);
|
||||
vEd = find_set(G[i].second.second);
|
||||
if (uSt != vEd) {
|
||||
T.push_back(G[i]); // add to mst vector
|
||||
union_set(uSt, vEd);
|
||||
}
|
||||
}
|
||||
}
|
||||
void Graph::display_mst()
|
||||
{
|
||||
cout << "Edge :" << " Weight" << endl;
|
||||
for (int i = 0; i < T.size(); i++) {
|
||||
cout << T[i].second.first << " - " << T[i].second.second << " : "
|
||||
<< T[i].first;
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
vector<pair<int, int>> reorder(vector<pair<float, pair<int, int>>> T, int root_original)
|
||||
{
|
||||
auto result = vector<pair<int, int>>();
|
||||
auto visited = vector<int>();
|
||||
auto nextVariables = unordered_set<int>();
|
||||
nextVariables.emplace(root_original);
|
||||
while (nextVariables.size() > 0) {
|
||||
int root = *nextVariables.begin();
|
||||
nextVariables.erase(nextVariables.begin());
|
||||
for (int i = 0; i < T.size(); ++i) {
|
||||
auto [weight, edge] = T[i];
|
||||
auto [from, to] = edge;
|
||||
if (from == root || to == root) {
|
||||
visited.insert(visited.begin(), i);
|
||||
if (from == root) {
|
||||
result.push_back({ from, to });
|
||||
nextVariables.emplace(to);
|
||||
} else {
|
||||
result.push_back({ to, from });
|
||||
nextVariables.emplace(from);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Remove visited
|
||||
for (int i = 0; i < visited.size(); ++i) {
|
||||
T.erase(T.begin() + visited[i]);
|
||||
}
|
||||
visited.clear();
|
||||
}
|
||||
if (T.size() > 0) {
|
||||
for (int i = 0; i < T.size(); ++i) {
|
||||
auto [weight, edge] = T[i];
|
||||
auto [from, to] = edge;
|
||||
result.push_back({ from, to });
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
MST::MST(vector<string>& features, Tensor& weights, int root) : features(features), weights(weights), root(root) {}
|
||||
vector<pair<int, int>> MST::maximumSpanningTree()
|
||||
{
|
||||
auto num_features = features.size();
|
||||
Graph g(num_features);
|
||||
|
||||
// Make a complete graph
|
||||
for (int i = 0; i < num_features - 1; ++i) {
|
||||
for (int j = i; j < num_features; ++j) {
|
||||
g.addEdge(i, j, weights[i][j].item<float>());
|
||||
}
|
||||
}
|
||||
g.kruskal_algorithm();
|
||||
auto mst = g.get_mst();
|
||||
return reorder(mst, root);
|
||||
}
|
||||
|
||||
}
|
35
bayesclass/cpp/Mst.h
Normal file
35
bayesclass/cpp/Mst.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef MST_H
|
||||
#define MST_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
class MST {
|
||||
private:
|
||||
Tensor weights;
|
||||
vector<string> features;
|
||||
int root;
|
||||
public:
|
||||
MST() = default;
|
||||
MST(vector<string>& features, Tensor& weights, int root);
|
||||
vector<pair<int, int>> maximumSpanningTree();
|
||||
};
|
||||
class Graph {
|
||||
private:
|
||||
int V; // number of nodes in graph
|
||||
vector <pair<float, pair<int, int>>> G; // vector for graph
|
||||
vector <pair<float, pair<int, int>>> T; // vector for mst
|
||||
vector<int> parent;
|
||||
public:
|
||||
Graph(int V);
|
||||
void addEdge(int u, int v, float wt);
|
||||
int find_set(int i);
|
||||
void union_set(int u, int v);
|
||||
void kruskal_algorithm();
|
||||
void display_mst();
|
||||
vector <pair<float, pair<int, int>>> get_mst() { return T; }
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -2,19 +2,13 @@
|
||||
#include <mutex>
|
||||
#include "Network.h"
|
||||
namespace bayesnet {
|
||||
Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8) {}
|
||||
Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT) {}
|
||||
Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT) {}
|
||||
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads())
|
||||
Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8), fitted(false) {}
|
||||
Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
|
||||
Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
|
||||
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted)
|
||||
{
|
||||
for (auto& pair : other.nodes) {
|
||||
nodes[pair.first] = new Node(*pair.second);
|
||||
}
|
||||
}
|
||||
Network::~Network()
|
||||
{
|
||||
for (auto& pair : nodes) {
|
||||
delete pair.second;
|
||||
nodes[pair.first] = make_unique<Node>(*pair.second);
|
||||
}
|
||||
}
|
||||
float Network::getmaxThreads()
|
||||
@@ -27,12 +21,15 @@ namespace bayesnet {
|
||||
}
|
||||
void Network::addNode(string name, int numStates)
|
||||
{
|
||||
if (find(features.begin(), features.end(), name) == features.end()) {
|
||||
features.push_back(name);
|
||||
}
|
||||
if (nodes.find(name) != nodes.end()) {
|
||||
// if node exists update its number of states
|
||||
nodes[name]->setNumStates(numStates);
|
||||
return;
|
||||
}
|
||||
nodes[name] = new Node(name, numStates);
|
||||
nodes[name] = make_unique<Node>(name, numStates);
|
||||
}
|
||||
vector<string> Network::getFeatures()
|
||||
{
|
||||
@@ -45,7 +42,7 @@ namespace bayesnet {
|
||||
int Network::getStates()
|
||||
{
|
||||
int result = 0;
|
||||
for (auto node : nodes) {
|
||||
for (auto& node : nodes) {
|
||||
result += node.second->getNumStates();
|
||||
}
|
||||
return result;
|
||||
@@ -79,20 +76,20 @@ namespace bayesnet {
|
||||
throw invalid_argument("Child node " + child + " does not exist");
|
||||
}
|
||||
// Temporarily add edge to check for cycles
|
||||
nodes[parent]->addChild(nodes[child]);
|
||||
nodes[child]->addParent(nodes[parent]);
|
||||
nodes[parent]->addChild(nodes[child].get());
|
||||
nodes[child]->addParent(nodes[parent].get());
|
||||
unordered_set<string> visited;
|
||||
unordered_set<string> recStack;
|
||||
if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
|
||||
{
|
||||
// remove problematic edge
|
||||
nodes[parent]->removeChild(nodes[child]);
|
||||
nodes[child]->removeParent(nodes[parent]);
|
||||
nodes[parent]->removeChild(nodes[child].get());
|
||||
nodes[child]->removeParent(nodes[parent].get());
|
||||
throw invalid_argument("Adding this edge forms a cycle in the graph.");
|
||||
}
|
||||
|
||||
}
|
||||
map<string, Node*>& Network::getNodes()
|
||||
map<string, std::unique_ptr<Node>>& Network::getNodes()
|
||||
{
|
||||
return nodes;
|
||||
}
|
||||
@@ -140,9 +137,8 @@ namespace bayesnet {
|
||||
lock.unlock();
|
||||
|
||||
pair.second->computeCPT(dataset, laplaceSmoothing);
|
||||
|
||||
lock.lock();
|
||||
nodes[pair.first] = pair.second;
|
||||
nodes[pair.first] = std::move(pair.second);
|
||||
lock.unlock();
|
||||
}
|
||||
lock_guard<mutex> lock(mtx);
|
||||
@@ -155,10 +151,14 @@ namespace bayesnet {
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
fitted = true;
|
||||
}
|
||||
|
||||
vector<int> Network::predict(const vector<vector<int>>& tsamples)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("You must call fit() before calling predict()");
|
||||
}
|
||||
vector<int> predictions;
|
||||
vector<int> sample;
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
@@ -176,6 +176,9 @@ namespace bayesnet {
|
||||
}
|
||||
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("You must call fit() before calling predict_proba()");
|
||||
}
|
||||
vector<vector<double>> predictions;
|
||||
vector<int> sample;
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
@@ -215,7 +218,7 @@ namespace bayesnet {
|
||||
double Network::computeFactor(map<string, int>& completeEvidence)
|
||||
{
|
||||
double result = 1.0;
|
||||
for (auto node : getNodes()) {
|
||||
for (auto& node : getNodes()) {
|
||||
result *= node.second->getFactorValue(completeEvidence);
|
||||
}
|
||||
return result;
|
||||
@@ -249,7 +252,7 @@ namespace bayesnet {
|
||||
{
|
||||
vector<string> result;
|
||||
// Draw the network
|
||||
for (auto node : nodes) {
|
||||
for (auto& node : nodes) {
|
||||
string line = node.first + " -> ";
|
||||
for (auto child : node.second->getChildren()) {
|
||||
line += child->getName() + ", ";
|
||||
@@ -258,5 +261,31 @@ namespace bayesnet {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
vector<string> Network::graph(string title)
|
||||
{
|
||||
auto output = vector<string>();
|
||||
auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
|
||||
auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
|
||||
string header = prefix + title + suffix;
|
||||
output.push_back(header);
|
||||
for (auto& node : nodes) {
|
||||
auto result = node.second->graph(className);
|
||||
output.insert(output.end(), result.begin(), result.end());
|
||||
}
|
||||
output.push_back("}\n");
|
||||
return output;
|
||||
}
|
||||
vector<pair<string, string>> Network::getEdges()
|
||||
{
|
||||
auto edges = vector<pair<string, string>>();
|
||||
for (const auto& node : nodes) {
|
||||
auto head = node.first;
|
||||
for (const auto& child : node.second->getChildren()) {
|
||||
auto tail = child->getName();
|
||||
edges.push_back({ head, tail });
|
||||
}
|
||||
}
|
||||
return edges;
|
||||
}
|
||||
|
||||
}
|
@@ -7,8 +7,9 @@
|
||||
namespace bayesnet {
|
||||
class Network {
|
||||
private:
|
||||
map<string, Node*> nodes;
|
||||
map<string, std::unique_ptr<Node>> nodes;
|
||||
map<string, vector<int>> dataset;
|
||||
bool fitted;
|
||||
float maxThreads;
|
||||
int classNumStates;
|
||||
vector<string> features;
|
||||
@@ -28,14 +29,14 @@ namespace bayesnet {
|
||||
Network(float, int);
|
||||
Network(float);
|
||||
Network(Network&);
|
||||
~Network();
|
||||
torch::Tensor& getSamples();
|
||||
float getmaxThreads();
|
||||
void addNode(string, int);
|
||||
void addEdge(const string, const string);
|
||||
map<string, Node*>& getNodes();
|
||||
map<string, std::unique_ptr<Node>>& getNodes();
|
||||
vector<string> getFeatures();
|
||||
int getStates();
|
||||
vector<pair<string, string>> getEdges();
|
||||
int getClassNumStates();
|
||||
string getClassName();
|
||||
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
|
||||
@@ -45,6 +46,7 @@ namespace bayesnet {
|
||||
vector<vector<double>> predict_proba(const vector<vector<int>>&);
|
||||
double score(const vector<vector<int>>&, const vector<int>&);
|
||||
vector<string> show();
|
||||
vector<string> graph(string title); // Returns a vector of strings representing the graph in graphviz format
|
||||
inline string version() { return "0.1.0"; }
|
||||
};
|
||||
}
|
@@ -6,12 +6,10 @@ namespace bayesnet {
|
||||
: name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
|
||||
{
|
||||
}
|
||||
|
||||
string Node::getName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
void Node::addParent(Node* parent)
|
||||
{
|
||||
parents.push_back(parent);
|
||||
@@ -111,4 +109,14 @@ namespace bayesnet {
|
||||
}
|
||||
return cpTable.index({ coordinates }).item<float>();
|
||||
}
|
||||
vector<string> Node::graph(string className)
|
||||
{
|
||||
auto output = vector<string>();
|
||||
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
|
||||
output.push_back(name + " [shape=circle" + suffix + "] \n");
|
||||
for (auto& child : children) {
|
||||
output.push_back(name + " -> " + child->getName());
|
||||
}
|
||||
return output;
|
||||
}
|
||||
}
|
@@ -29,6 +29,7 @@ namespace bayesnet {
|
||||
int getNumStates() const;
|
||||
void setNumStates(int);
|
||||
unsigned minFill();
|
||||
vector<string> graph(string clasName); // Returns a vector of strings representing the graph in graphviz format
|
||||
float getFactorValue(map<string, int>&);
|
||||
};
|
||||
}
|
25
bayesclass/cpp/SPODE.cc
Normal file
25
bayesclass/cpp/SPODE.cc
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "SPODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
SPODE::SPODE(int root) : BaseClassifier(Network()), root(root) {}
|
||||
|
||||
void SPODE::train()
|
||||
{
|
||||
// 0. Add all nodes to the model
|
||||
addNodes();
|
||||
// 1. Add edges from the class node to all other nodes
|
||||
// 2. Add edges from the root node to all other nodes
|
||||
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
|
||||
model.addEdge(className, features[i]);
|
||||
if (i != root) {
|
||||
model.addEdge(features[root], features[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
vector<string> SPODE::graph(string name )
|
||||
{
|
||||
return model.graph(name);
|
||||
}
|
||||
|
||||
}
|
15
bayesclass/cpp/SPODE.h
Normal file
15
bayesclass/cpp/SPODE.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef SPODE_H
|
||||
#define SPODE_H
|
||||
#include "BaseClassifier.h"
|
||||
namespace bayesnet {
|
||||
class SPODE : public BaseClassifier {
|
||||
private:
|
||||
int root;
|
||||
protected:
|
||||
void train() override;
|
||||
public:
|
||||
SPODE(int root);
|
||||
vector<string> graph(string name = "SPODE") override;
|
||||
};
|
||||
}
|
||||
#endif
|
42
bayesclass/cpp/TAN.cc
Normal file
42
bayesclass/cpp/TAN.cc
Normal file
@@ -0,0 +1,42 @@
|
||||
#include "TAN.h"
|
||||
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
|
||||
TAN::TAN() : BaseClassifier(Network()) {}
|
||||
|
||||
void TAN::train()
|
||||
{
|
||||
// 0. Add all nodes to the model
|
||||
addNodes();
|
||||
// 1. Compute mutual information between each feature and the class and set the root node
|
||||
// as the highest mutual information with the class
|
||||
auto mi = vector <pair<int, float >>();
|
||||
Tensor class_dataset = dataset.index({ "...", -1 });
|
||||
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
|
||||
Tensor feature_dataset = dataset.index({ "...", i });
|
||||
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset);
|
||||
mi.push_back({ i, mi_value });
|
||||
}
|
||||
sort(mi.begin(), mi.end(), [](auto& left, auto& right) {return left.second < right.second;});
|
||||
auto root = mi[mi.size() - 1].first;
|
||||
// 2. Compute mutual information between each feature and the class
|
||||
auto weights = metrics.conditionalEdge();
|
||||
// 3. Compute the maximum spanning tree
|
||||
auto mst = metrics.maximumSpanningTree(features, weights, root);
|
||||
// 4. Add edges from the maximum spanning tree to the model
|
||||
for (auto i = 0; i < mst.size(); ++i) {
|
||||
auto [from, to] = mst[i];
|
||||
model.addEdge(features[from], features[to]);
|
||||
}
|
||||
// 5. Add edges from the class to all features
|
||||
for (auto feature : features) {
|
||||
model.addEdge(className, feature);
|
||||
}
|
||||
}
|
||||
vector<string> TAN::graph(string title)
|
||||
{
|
||||
return model.graph(title);
|
||||
}
|
||||
}
|
16
bayesclass/cpp/TAN.h
Normal file
16
bayesclass/cpp/TAN.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef TAN_H
|
||||
#define TAN_H
|
||||
#include "BaseClassifier.h"
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
class TAN : public BaseClassifier {
|
||||
private:
|
||||
protected:
|
||||
void train() override;
|
||||
public:
|
||||
TAN();
|
||||
vector<string> graph(string name = "TAN") override;
|
||||
};
|
||||
}
|
||||
#endif
|
31
bayesclass/cpp/utils.cc
Normal file
31
bayesclass/cpp/utils.cc
Normal file
@@ -0,0 +1,31 @@
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
vector<int> argsort(vector<float>& nums)
|
||||
{
|
||||
int n = nums.size();
|
||||
vector<int> indices(n);
|
||||
iota(indices.begin(), indices.end(), 0);
|
||||
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
|
||||
return indices;
|
||||
}
|
||||
vector<vector<int>> tensorToVector(const Tensor& tensor)
|
||||
{
|
||||
// convert mxn tensor to nxm vector
|
||||
vector<vector<int>> result;
|
||||
auto tensor_accessor = tensor.accessor<int, 2>();
|
||||
|
||||
// Iterate over columns and rows of the tensor
|
||||
for (int j = 0; j < tensor.size(1); ++j) {
|
||||
vector<int> column;
|
||||
for (int i = 0; i < tensor.size(0); ++i) {
|
||||
column.push_back(tensor_accessor[i][j]);
|
||||
}
|
||||
result.push_back(column);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
8
bayesclass/cpp/utils.h
Normal file
8
bayesclass/cpp/utils.h
Normal file
@@ -0,0 +1,8 @@
|
||||
namespace bayesnet {
|
||||
using namespace std;
|
||||
using namespace torch;
|
||||
vector<int> argsort(vector<float>& nums);
|
||||
|
||||
vector<vector<int>> tensorToVector(const Tensor& tensor);
|
||||
|
||||
}
|
16
setup.py
16
setup.py
@@ -18,7 +18,7 @@ setup(
|
||||
name="bayesclass.cppSelectFeatures",
|
||||
sources=[
|
||||
"bayesclass/cSelectFeatures.pyx",
|
||||
"bayesclass/FeatureSelect.cpp",
|
||||
"bayesclass/cpp/FeatureSelect.cpp",
|
||||
],
|
||||
language="c++",
|
||||
include_dirs=["bayesclass"],
|
||||
@@ -30,9 +30,17 @@ setup(
|
||||
name="bayesclass.BayesNet",
|
||||
sources=[
|
||||
"bayesclass/BayesNetwork.pyx",
|
||||
"bayesclass/Network.cc",
|
||||
"bayesclass/Node.cc",
|
||||
"bayesclass/Metrics.cc",
|
||||
"bayesclass/cpp/Network.cc",
|
||||
"bayesclass/cpp/Node.cc",
|
||||
"bayesclass/cpp/Metrics.cc",
|
||||
"bayesclass/cpp/utils.cc",
|
||||
"bayesclass/cpp/Mst.cc",
|
||||
"bayesclass/cpp/BaseClassifier.cc",
|
||||
"bayesclass/cpp/Ensemble.cc",
|
||||
"bayesclass/cpp/TAN.cc",
|
||||
"bayesclass/cpp/KDB.cc",
|
||||
"bayesclass/cpp/SPODE.cc",
|
||||
"bayesclass/cpp/AODE.cc",
|
||||
],
|
||||
include_dirs=include_paths(),
|
||||
),
|
||||
|
Reference in New Issue
Block a user