BayesNet/tests/utils.cc

40 lines
1.4 KiB
C++
Raw Normal View History

2023-07-18 11:44:08 +00:00
#include "utils.h"
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t> &X, mdlp::labels_t &y, vector<string> features) {
vector<mdlp::labels_t> Xd;
map<string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t &xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return {Xd, maxes};
}
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>>
loadFile(string name) {
auto handler = ArffFiles();
handler.load(PATH + static_cast<string>(name) + ".arff");
// Get Dataset X, y
vector<mdlp::samples_t> &X = handler.getX();
mdlp::labels_t &y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
vector<string> features;
for (auto feature: handler.getAttributes()) {
features.push_back(feature.first);
}
// Discretize Dataset
vector<mdlp::labels_t> Xd;
map<string, int> maxes;
tie(Xd, maxes) = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y. end()) + 1;
map<string, vector<int>> states;
for (auto feature: features) {
states[feature] = vector<int>(maxes[feature]);
}
states[className] = vector<int>(maxes[className]);
return {Xd, y, features, className, states};
}