Ensemble Experiment, Folding, Classifiers and Network together
This commit is contained in:
parent
644b6c9be0
commit
0c226371cc
@ -7,8 +7,10 @@ namespace bayesnet {
|
||||
class BaseClassifier {
|
||||
public:
|
||||
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
|
||||
vector<int> virtual predict(vector<vector<int>>& X) = 0;
|
||||
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
|
||||
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
|
||||
vector<string> virtual show() = 0;
|
||||
vector<string> virtual graph(string title = "") = 0;
|
||||
virtual ~BaseClassifier() = default;
|
||||
|
@ -15,10 +15,23 @@ namespace bayesnet {
|
||||
auto n_classes = states[className].size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
train();
|
||||
if (Xv == vector<vector<int>>()) {
|
||||
model.fit(X, y, features, className);
|
||||
} else {
|
||||
model.fit(Xv, yv, features, className);
|
||||
}
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = X;
|
||||
this->y = y;
|
||||
Xv = vector<vector<int>>();
|
||||
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||
return build(features, className, states);
|
||||
}
|
||||
|
||||
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
|
||||
|
@ -29,15 +29,16 @@ namespace bayesnet {
|
||||
public:
|
||||
Classifier(Network model);
|
||||
virtual ~Classifier() = default;
|
||||
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
|
||||
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
|
||||
void addNodes();
|
||||
int getNumberOfNodes();
|
||||
int getNumberOfEdges();
|
||||
Tensor predict(Tensor& X);
|
||||
vector<int> predict(vector<vector<int>>& X);
|
||||
float score(Tensor& X, Tensor& y);
|
||||
float score(vector<vector<int>>& X, vector<int>& y);
|
||||
vector<string> show();
|
||||
vector<int> predict(vector<vector<int>>& X) override;
|
||||
float score(Tensor& X, Tensor& y) override;
|
||||
float score(vector<vector<int>>& X, vector<int>& y) override;
|
||||
vector<string> show() override;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@ -22,6 +22,14 @@ namespace bayesnet {
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
Ensemble& Ensemble::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = X;
|
||||
this->y = y;
|
||||
Xv = vector<vector<int>>();
|
||||
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||
return build(features, className, states);
|
||||
}
|
||||
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
|
||||
@ -75,6 +83,20 @@ namespace bayesnet {
|
||||
}
|
||||
return voting(y_pred);
|
||||
}
|
||||
float Ensemble::score(Tensor& X, Tensor& y)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
auto y_pred = predict(X);
|
||||
int correct = 0;
|
||||
for (int i = 0; i < y_pred.size(0); ++i) {
|
||||
if (y_pred[i].item<int>() == y[i].item<int>()) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return (double)correct / y_pred.size(0);
|
||||
}
|
||||
float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
|
||||
{
|
||||
if (!fitted) {
|
||||
|
@ -31,9 +31,10 @@ namespace bayesnet {
|
||||
Ensemble();
|
||||
virtual ~Ensemble() = default;
|
||||
Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
|
||||
Ensemble& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
|
||||
Tensor predict(Tensor& X);
|
||||
vector<int> predict(vector<vector<int>>& X) override;
|
||||
float score(Tensor& X, Tensor& y);
|
||||
float score(Tensor& X, Tensor& y) override;
|
||||
float score(vector<vector<int>>& X, vector<int>& y) override;
|
||||
vector<string> show() override;
|
||||
vector<string> graph(string title) override;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include "Network.h"
|
||||
#include "bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8), fitted(false) {}
|
||||
Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
|
||||
@ -8,7 +9,7 @@ namespace bayesnet {
|
||||
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted)
|
||||
{
|
||||
for (auto& pair : other.nodes) {
|
||||
nodes[pair.first] = make_unique<Node>(*pair.second);
|
||||
nodes[pair.first] = std::make_unique<Node>(*pair.second);
|
||||
}
|
||||
}
|
||||
float Network::getmaxThreads()
|
||||
@ -29,7 +30,7 @@ namespace bayesnet {
|
||||
nodes[name]->setNumStates(numStates);
|
||||
return;
|
||||
}
|
||||
nodes[name] = make_unique<Node>(name, numStates);
|
||||
nodes[name] = std::make_unique<Node>(name, numStates);
|
||||
}
|
||||
vector<string> Network::getFeatures()
|
||||
{
|
||||
@ -93,6 +94,10 @@ namespace bayesnet {
|
||||
{
|
||||
return nodes;
|
||||
}
|
||||
void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& featureNames, const string& className)
|
||||
{
|
||||
this->fit(tensorToVector(X), vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)), featureNames, className);
|
||||
}
|
||||
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
|
||||
{
|
||||
features = featureNames;
|
||||
|
@ -40,6 +40,7 @@ namespace bayesnet {
|
||||
int getClassNumStates();
|
||||
string getClassName();
|
||||
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
|
||||
void fit(torch::Tensor&, torch::Tensor&, const vector<string>&, const string&);
|
||||
vector<int> predict(const vector<vector<int>>&);
|
||||
//Computes the conditional edge weight of variable index u and v conditioned on class_node
|
||||
torch::Tensor conditionalEdgeWeight();
|
||||
|
@ -17,16 +17,19 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, int k)
|
||||
pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||
{
|
||||
auto k = fold->getNumberOfFolds();
|
||||
float accuracy = 0.0;
|
||||
for (int i = 0; i < k; i++) {
|
||||
auto [train, test] = fold->getFold(i);
|
||||
auto X_train = X.indices{ train };
|
||||
auto y_train = y.indices{ train };
|
||||
auto X_test = X.indices{ test };
|
||||
auto y_test = y.indices{ test };
|
||||
model->fit(X_train, y_train);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
model->fit(X_train, y_train, features, className, states);
|
||||
auto acc = model->score(X_test, y_test);
|
||||
accuracy += acc;
|
||||
}
|
||||
@ -97,9 +100,12 @@ int main(int argc, char** argv)
|
||||
/*
|
||||
* Begin Processing
|
||||
*/
|
||||
auto [X, y, features] = loadDataset(file_name, discretize_dataset);
|
||||
auto [X, y, features, className] = loadDataset(file_name, discretize_dataset, class_last);
|
||||
auto states = map<string, vector<int>>();
|
||||
if (discretize_dataset) {
|
||||
auto [discretized, maxes] = discretize(X, y, features);
|
||||
auto [Xd, maxes] = discretizeTorch(X, y, features);
|
||||
states = get_states(Xd, y, features, className);
|
||||
X = Xd;
|
||||
}
|
||||
auto fold = StratifiedKFold(5, y, -1);
|
||||
auto classifiers = map<string, bayesnet::BaseClassifier*>({
|
||||
@ -108,7 +114,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
);
|
||||
bayesnet::BaseClassifier* model = classifiers[model_name];
|
||||
auto results = cross_validation(model, X, y, fold, 5);
|
||||
auto results = cross_validation(&fold, model, X, y, features, className, states);
|
||||
cout << "Accuracy: " << results.first << endl;
|
||||
return 0;
|
||||
}
|
@ -28,10 +28,21 @@ pair<vector<int>, vector<int>> KFold::getFold(int nFold)
|
||||
}
|
||||
return { train, test };
|
||||
}
|
||||
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
|
||||
{
|
||||
n = y.numel();
|
||||
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
|
||||
build();
|
||||
}
|
||||
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
|
||||
: Fold(k, y.size(), seed)
|
||||
{
|
||||
this->y = y;
|
||||
n = y.size();
|
||||
build();
|
||||
}
|
||||
void StratifiedKFold::build()
|
||||
{
|
||||
stratified_indices = vector<vector<int>>(k);
|
||||
int fold_size = n / k;
|
||||
int remainder = n % k;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#ifndef FOLDING_H
|
||||
#define FOLDING_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
@ -12,6 +13,7 @@ public:
|
||||
Fold(int k, int n, int seed = -1) : k(k), n(n), seed(seed) {}
|
||||
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
|
||||
virtual ~Fold() = default;
|
||||
int getNumberOfFolds() { return k; }
|
||||
};
|
||||
class KFold : public Fold {
|
||||
private:
|
||||
@ -21,9 +23,13 @@ public:
|
||||
pair<vector<int>, vector<int>> getFold(int nFold);
|
||||
};
|
||||
class StratifiedKFold : public Fold {
|
||||
private:
|
||||
vector<int> y;
|
||||
vector<vector<int>> stratified_indices;
|
||||
void build();
|
||||
public:
|
||||
StratifiedKFold(int k, const vector<int>& y, int seed = -1);
|
||||
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
|
||||
pair<vector<int>, vector<int>> getFold(int nFold);
|
||||
};
|
||||
#endif
|
@ -15,6 +15,22 @@ pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t
|
||||
}
|
||||
return { Xd, maxes };
|
||||
}
|
||||
pair<Tensor, map<string, int>> discretizeTorch(Tensor& X, Tensor& y, vector<string> features)
|
||||
{
|
||||
map<string, int> maxes;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
auto Xd = torch::zeros_like(X, torch::kInt64);
|
||||
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||
for (int i = 0; i < X.size(1); i++) {
|
||||
auto xv = vector<float>(X.select(1, i).data_ptr<float>(), X.select(1, i).data_ptr<float>() + X.size(0));
|
||||
fimdlp.fit(xv, yv);
|
||||
auto xdv = fimdlp.transform(xv);
|
||||
auto xd = torch::tensor(xdv, torch::kInt64);
|
||||
maxes[features[i]] = xd.max().item<int>() + 1;
|
||||
Xd.index_put_({ "...", i }, xd);
|
||||
}
|
||||
return { Xd, maxes };
|
||||
}
|
||||
|
||||
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
@ -38,10 +54,10 @@ bool file_exists(const std::string& name)
|
||||
}
|
||||
}
|
||||
|
||||
tuple < Tensor, Tensor, vector<string>> loadDataset(string name, bool discretize)
|
||||
tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
handler.load(PATH + static_cast<string>(name) + ".arff");
|
||||
handler.load(PATH + static_cast<string>(name) + ".arff", class_last);
|
||||
// Get Dataset X, y
|
||||
vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t& y = handler.getY();
|
||||
@ -64,20 +80,20 @@ tuple < Tensor, Tensor, vector<string>> loadDataset(string name, bool discretize
|
||||
Xd.index_put_({ "...", i }, torch::tensor(X[i], torch::kFloat64));
|
||||
}
|
||||
}
|
||||
return { Xd, torch::tensor(y, torch::kInt64), features };
|
||||
return { Xd, torch::tensor(y, torch::kInt64), features, className };
|
||||
}
|
||||
|
||||
pair <map<string, int>, map<string, vector<int>>> discretize_info(Tensor& X, Tensor& y, vector<string> features, string className)
|
||||
map<string, vector<int>> get_states(Tensor& X, Tensor& y, vector<string> features, string className)
|
||||
{
|
||||
map<string, int> maxes;
|
||||
int max;
|
||||
map<string, vector<int>> states;
|
||||
for (int i = 0; i < X.size(1); i++) {
|
||||
maxes[features[i]] = X.select(1, i).max().item<int>() + 1;
|
||||
states[features[i]] = vector<int>(maxes[features[i]]);
|
||||
max = X.select(1, i).max().item<int>() + 1;
|
||||
states[features[i]] = vector<int>(max);
|
||||
}
|
||||
maxes[className] = y.max().item<int>() + 1;
|
||||
states[className] = vector<int>(maxes[className]);
|
||||
return { maxes, states };
|
||||
max = y.max().item<int>() + 1;
|
||||
states[className] = vector<int>(max);
|
||||
return states;
|
||||
}
|
||||
|
||||
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name)
|
||||
|
@ -12,7 +12,8 @@ const string PATH = "../../data/";
|
||||
|
||||
bool file_exists(const std::string& name);
|
||||
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
|
||||
pair<torch::Tensor, map<string, int>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string> features);
|
||||
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name);
|
||||
tuple<torch::Tensor, torch::Tensor, vector<string>> loadDataset(string name, bool discretize);
|
||||
pair <map<string, int>, map<string, vector<int>>> discretize_info(torch::Tensor& X, torch::Tensor& y);
|
||||
tuple<torch::Tensor, torch::Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last);
|
||||
map<string, vector<int>> get_states(torch::Tensor& X, torch::Tensor& y, vector<string> features, string className);
|
||||
#endif //PLATFORM_UTILS_H
|
||||
|
Loading…
Reference in New Issue
Block a user