Ensemble Experiment, Folding, Classifiers and Network together

This commit is contained in:
2023-07-23 14:10:28 +02:00
parent 644b6c9be0
commit 0c226371cc
12 changed files with 116 additions and 31 deletions

View File

@@ -17,16 +17,19 @@
using namespace std;
pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, int k)
pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
{
auto k = fold->getNumberOfFolds();
float accuracy = 0.0;
for (int i = 0; i < k; i++) {
auto [train, test] = fold->getFold(i);
auto X_train = X.indices{ train };
auto y_train = y.indices{ train };
auto X_test = X.indices{ test };
auto y_test = y.indices{ test };
model->fit(X_train, y_train);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ test_t });
auto y_test = y.index({ test_t });
model->fit(X_train, y_train, features, className, states);
auto acc = model->score(X_test, y_test);
accuracy += acc;
}
@@ -97,9 +100,12 @@ int main(int argc, char** argv)
/*
* Begin Processing
*/
auto [X, y, features] = loadDataset(file_name, discretize_dataset);
auto [X, y, features, className] = loadDataset(file_name, discretize_dataset, class_last);
auto states = map<string, vector<int>>();
if (discretize_dataset) {
auto [discretized, maxes] = discretize(X, y, features);
auto [Xd, maxes] = discretizeTorch(X, y, features);
states = get_states(Xd, y, features, className);
X = Xd;
}
auto fold = StratifiedKFold(5, y, -1);
auto classifiers = map<string, bayesnet::BaseClassifier*>({
@@ -108,7 +114,7 @@ int main(int argc, char** argv)
}
);
bayesnet::BaseClassifier* model = classifiers[model_name];
auto results = cross_validation(model, X, y, fold, 5);
auto results = cross_validation(&fold, model, X, y, features, className, states);
cout << "Accuracy: " << results.first << endl;
return 0;
}
}

View File

@@ -28,10 +28,21 @@ pair<vector<int>, vector<int>> KFold::getFold(int nFold)
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
int fold_size = n / k;
int remainder = n % k;

View File

@@ -1,5 +1,6 @@
#ifndef FOLDING_H
#define FOLDING_H
#include <torch/torch.h>
#include <vector>
using namespace std;
@@ -12,6 +13,7 @@ public:
Fold(int k, int n, int seed = -1) : k(k), n(n), seed(seed) {}
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }
};
class KFold : public Fold {
private:
@@ -21,9 +23,13 @@ public:
pair<vector<int>, vector<int>> getFold(int nFold);
};
class StratifiedKFold : public Fold {
private:
vector<int> y;
vector<vector<int>> stratified_indices;
void build();
public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold);
};
#endif

View File

@@ -15,6 +15,22 @@ pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t
}
return { Xd, maxes };
}
pair<Tensor, map<string, int>> discretizeTorch(Tensor& X, Tensor& y, vector<string> features)
{
map<string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
auto Xd = torch::zeros_like(X, torch::kInt64);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
for (int i = 0; i < X.size(1); i++) {
auto xv = vector<float>(X.select(1, i).data_ptr<float>(), X.select(1, i).data_ptr<float>() + X.size(0));
fimdlp.fit(xv, yv);
auto xdv = fimdlp.transform(xv);
auto xd = torch::tensor(xdv, torch::kInt64);
maxes[features[i]] = xd.max().item<int>() + 1;
Xd.index_put_({ "...", i }, xd);
}
return { Xd, maxes };
}
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
@@ -38,10 +54,10 @@ bool file_exists(const std::string& name)
}
}
tuple < Tensor, Tensor, vector<string>> loadDataset(string name, bool discretize)
tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last)
{
auto handler = ArffFiles();
handler.load(PATH + static_cast<string>(name) + ".arff");
handler.load(PATH + static_cast<string>(name) + ".arff", class_last);
// Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
@@ -64,20 +80,20 @@ tuple < Tensor, Tensor, vector<string>> loadDataset(string name, bool discretize
Xd.index_put_({ "...", i }, torch::tensor(X[i], torch::kFloat64));
}
}
return { Xd, torch::tensor(y, torch::kInt64), features };
return { Xd, torch::tensor(y, torch::kInt64), features, className };
}
pair <map<string, int>, map<string, vector<int>>> discretize_info(Tensor& X, Tensor& y, vector<string> features, string className)
map<string, vector<int>> get_states(Tensor& X, Tensor& y, vector<string> features, string className)
{
map<string, int> maxes;
int max;
map<string, vector<int>> states;
for (int i = 0; i < X.size(1); i++) {
maxes[features[i]] = X.select(1, i).max().item<int>() + 1;
states[features[i]] = vector<int>(maxes[features[i]]);
max = X.select(1, i).max().item<int>() + 1;
states[features[i]] = vector<int>(max);
}
maxes[className] = y.max().item<int>() + 1;
states[className] = vector<int>(maxes[className]);
return { maxes, states };
max = y.max().item<int>() + 1;
states[className] = vector<int>(max);
return states;
}
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name)

View File

@@ -12,7 +12,8 @@ const string PATH = "../../data/";
bool file_exists(const std::string& name);
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
pair<torch::Tensor, map<string, int>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string> features);
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name);
tuple<torch::Tensor, torch::Tensor, vector<string>> loadDataset(string name, bool discretize);
pair <map<string, int>, map<string, vector<int>>> discretize_info(torch::Tensor& X, torch::Tensor& y);
tuple<torch::Tensor, torch::Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last);
map<string, vector<int>> get_states(torch::Tensor& X, torch::Tensor& y, vector<string> features, string className);
#endif //PLATFORM_UTILS_H