Begin implementing KDB
This commit is contained in:
parent
c5386d66fc
commit
8b0aa5ccfb
@ -7,6 +7,7 @@
|
|||||||
#include "Network.h"
|
#include "Network.h"
|
||||||
#include "Metrics.hpp"
|
#include "Metrics.hpp"
|
||||||
#include "CPPFImdlp.h"
|
#include "CPPFImdlp.h"
|
||||||
|
#include "KDB.h"
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -247,5 +248,14 @@ int main(int argc, char** argv)
|
|||||||
long m2 = features.size() + 1;
|
long m2 = features.size() + 1;
|
||||||
auto matrix2 = torch::from_blob(conditional2.data(), { m, m });
|
auto matrix2 = torch::from_blob(conditional2.data(), { m, m });
|
||||||
cout << matrix2 << endl;
|
cout << matrix2 << endl;
|
||||||
|
cout << "****************** KDB ******************" << endl;
|
||||||
|
map<string, vector<int>> states;
|
||||||
|
for (auto feature : features) {
|
||||||
|
states[feature] = vector<int>(maxes[feature]);
|
||||||
|
}
|
||||||
|
states[className] = vector<int>(maxes[className]);
|
||||||
|
auto kdb = bayesnet::KDB(1);
|
||||||
|
kdb.fit(Xd, y, features, className, states);
|
||||||
|
cout << "****************** KDB ******************" << endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
90
src/BaseClassifier.cc
Normal file
90
src/BaseClassifier.cc
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
#include "BaseClassifier.h"
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
using namespace std;
|
||||||
|
using namespace torch;
|
||||||
|
|
||||||
|
BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0) {}
|
||||||
|
BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
|
||||||
|
{
|
||||||
|
|
||||||
|
dataset = torch::cat({ X, y.view({150, 1}) }, 1);
|
||||||
|
this->features = features;
|
||||||
|
this->className = className;
|
||||||
|
this->states = states;
|
||||||
|
cout << "Checking fit parameters" << endl;
|
||||||
|
checkFitParameters();
|
||||||
|
train();
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
BaseClassifier& BaseClassifier::fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||||
|
{
|
||||||
|
this->X = X;
|
||||||
|
this->y = y;
|
||||||
|
return build(features, className, states);
|
||||||
|
}
|
||||||
|
BaseClassifier& BaseClassifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||||
|
{
|
||||||
|
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
|
||||||
|
for (int i = 0; i < X.size(); ++i) {
|
||||||
|
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
|
||||||
|
}
|
||||||
|
this->y = torch::tensor(y, kInt64);
|
||||||
|
return build(features, className, states);
|
||||||
|
}
|
||||||
|
void BaseClassifier::checkFitParameters()
|
||||||
|
{
|
||||||
|
auto sizes = X.sizes();
|
||||||
|
m = sizes[0];
|
||||||
|
n = sizes[1];
|
||||||
|
if (m != y.size(0)) {
|
||||||
|
throw invalid_argument("X and y must have the same number of samples");
|
||||||
|
}
|
||||||
|
if (n != features.size()) {
|
||||||
|
throw invalid_argument("X and features must have the same number of features");
|
||||||
|
}
|
||||||
|
if (states.find(className) == states.end()) {
|
||||||
|
throw invalid_argument("className not found in states");
|
||||||
|
}
|
||||||
|
for (auto feature : features) {
|
||||||
|
if (states.find(feature) == states.end()) {
|
||||||
|
throw invalid_argument("feature [" + feature + "] not found in states");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vector<vector<int>> tensorToVector(const torch::Tensor& tensor)
|
||||||
|
{
|
||||||
|
// convert mxn tensor to nxm vector
|
||||||
|
vector<vector<int>> result;
|
||||||
|
auto tensor_accessor = tensor.accessor<int, 2>();
|
||||||
|
|
||||||
|
// Iterate over columns and rows of the tensor
|
||||||
|
for (int j = 0; j < tensor.size(1); ++j) {
|
||||||
|
vector<int> column;
|
||||||
|
for (int i = 0; i < tensor.size(0); ++i) {
|
||||||
|
column.push_back(tensor_accessor[i][j]);
|
||||||
|
}
|
||||||
|
result.push_back(column);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
Tensor BaseClassifier::predict(Tensor& X)
|
||||||
|
{
|
||||||
|
auto m_ = X.size(0);
|
||||||
|
auto n_ = X.size(1);
|
||||||
|
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
|
||||||
|
for (auto i = 0; i < n_; i++) {
|
||||||
|
auto temp = X.index({ "...", i });
|
||||||
|
Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + m_);
|
||||||
|
}
|
||||||
|
auto yp = model.predict(Xd);
|
||||||
|
auto ypred = torch::tensor(yp, torch::kInt64);
|
||||||
|
return ypred;
|
||||||
|
}
|
||||||
|
float BaseClassifier::score(Tensor& X, Tensor& y)
|
||||||
|
{
|
||||||
|
Tensor y_pred = predict(X);
|
||||||
|
return (y_pred == y).sum().item<float>() / y.size(0);
|
||||||
|
}
|
||||||
|
}
|
39
src/BaseClassifier.h
Normal file
39
src/BaseClassifier.h
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
#ifndef CLASSIFIERS_H
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include "Network.h"
|
||||||
|
using namespace std;
|
||||||
|
using namespace torch;
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
class BaseClassifier {
|
||||||
|
private:
|
||||||
|
BaseClassifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
|
||||||
|
protected:
|
||||||
|
Network model;
|
||||||
|
int m, n; // m: number of samples, n: number of features
|
||||||
|
Tensor X;
|
||||||
|
Tensor y;
|
||||||
|
Tensor dataset;
|
||||||
|
vector<string> features;
|
||||||
|
string className;
|
||||||
|
map<string, vector<int>> states;
|
||||||
|
void checkFitParameters();
|
||||||
|
virtual void train() = 0;
|
||||||
|
public:
|
||||||
|
BaseClassifier(Network model);
|
||||||
|
Tensor& getX();
|
||||||
|
vector<string>& getFeatures();
|
||||||
|
string& getClassName();
|
||||||
|
BaseClassifier& fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states);
|
||||||
|
BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
|
||||||
|
Tensor predict(Tensor& X);
|
||||||
|
float score(Tensor& X, Tensor& y);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,2 +1,2 @@
|
|||||||
add_library(BayesNet Network.cc Node.cc Metrics.cc)
|
add_library(BayesNet Network.cc Node.cc Metrics.cc BaseClassifier.cc KDB.cc)
|
||||||
target_link_libraries(BayesNet "${TORCH_LIBRARIES}")
|
target_link_libraries(BayesNet "${TORCH_LIBRARIES}")
|
44
src/KDB.cc
Normal file
44
src/KDB.cc
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#include "KDB.h"
|
||||||
|
#include "Metrics.hpp"
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
using namespace std;
|
||||||
|
using namespace torch;
|
||||||
|
KDB::KDB(int k) : BaseClassifier(Network()), k(k) {}
|
||||||
|
void KDB::train()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
1. For each feature Xi, compute mutual information, I(X;C),
|
||||||
|
where C is the class.
|
||||||
|
2. Compute class conditional mutual information I(Xi;XjIC), f or each
|
||||||
|
pair of features Xi and Xj, where i#j.
|
||||||
|
3. Let the used variable list, S, be empty.
|
||||||
|
4. Let the DAG network being constructed, BN, begin with a single
|
||||||
|
class node, C.
|
||||||
|
5. Repeat until S includes all domain features
|
||||||
|
5.1. Select feature Xmax which is not in S and has the largest value
|
||||||
|
I(Xmax;C).
|
||||||
|
5.2. Add a node to BN representing Xmax.
|
||||||
|
5.3. Add an arc from C to Xmax in BN.
|
||||||
|
5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
|
||||||
|
the highest value for I(Xmax;X,jC).
|
||||||
|
5.5. Add Xmax to S.
|
||||||
|
Compute the conditional probabilility infered by the structure of BN by
|
||||||
|
using counts from DB, and output BN.
|
||||||
|
*/
|
||||||
|
// 1. For each feature Xi, compute mutual information, I(X;C),
|
||||||
|
// where C is the class.
|
||||||
|
cout << "Computing mutual information between features and class" << endl;
|
||||||
|
auto n_classes = states[className].size();
|
||||||
|
auto metrics = Metrics(dataset, features, className, n_classes);
|
||||||
|
for (auto i = 0; i < features.size(); i++) {
|
||||||
|
Tensor firstFeature = X.index({ "...", i });
|
||||||
|
Tensor secondFeature = y;
|
||||||
|
double mi = metrics.mutualInformation(firstFeature, y);
|
||||||
|
cout << "Mutual information between " << features[i] << " and " << className << " is " << mi << endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
16
src/KDB.h
Normal file
16
src/KDB.h
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#ifndef KDB_H
|
||||||
|
#define KDB_H
|
||||||
|
#include "BaseClassifier.h"
|
||||||
|
namespace bayesnet {
|
||||||
|
using namespace std;
|
||||||
|
using namespace torch;
|
||||||
|
class KDB : public BaseClassifier {
|
||||||
|
private:
|
||||||
|
int k;
|
||||||
|
protected:
|
||||||
|
void train();
|
||||||
|
public:
|
||||||
|
KDB(int k);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@ -14,8 +14,8 @@ namespace bayesnet {
|
|||||||
vector<pair<string, string>> doCombinations(const vector<string>&);
|
vector<pair<string, string>> doCombinations(const vector<string>&);
|
||||||
double entropy(torch::Tensor&);
|
double entropy(torch::Tensor&);
|
||||||
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
|
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
|
||||||
double mutualInformation(torch::Tensor&, torch::Tensor&);
|
|
||||||
public:
|
public:
|
||||||
|
double mutualInformation(torch::Tensor&, torch::Tensor&);
|
||||||
Metrics(torch::Tensor&, vector<string>&, string&, int);
|
Metrics(torch::Tensor&, vector<string>&, string&, int);
|
||||||
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
|
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
|
||||||
vector<float> conditionalEdgeWeights();
|
vector<float> conditionalEdgeWeights();
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
class Network {
|
class Network {
|
||||||
private:
|
private:
|
||||||
|
Loading…
Reference in New Issue
Block a user