#include "TestUtils.h" using namespace std; using namespace torch; class Paths { public: static string datasets() { return "../data/"; } }; pair, map> discretize(vector& X, mdlp::labels_t& y, vector features) { vector Xd; map maxes; auto fimdlp = mdlp::CPPFImdlp(); for (int i = 0; i < X.size(); i++) { fimdlp.fit(X[i], y); mdlp::labels_t& xd = fimdlp.transform(X[i]); maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1; Xd.push_back(xd); } return { Xd, maxes }; } vector discretizeDataset(vector& X, mdlp::labels_t& y) { vector Xd; auto fimdlp = mdlp::CPPFImdlp(); for (int i = 0; i < X.size(); i++) { fimdlp.fit(X[i], y); mdlp::labels_t& xd = fimdlp.transform(X[i]); Xd.push_back(xd); } return Xd; } bool file_exists(const string& name) { if (FILE* file = fopen(name.c_str(), "r")) { fclose(file); return true; } else { return false; } } tuple, string, map>> loadDataset(const string& name, bool class_last, bool discretize_dataset) { auto handler = ArffFiles(); handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); // Get Dataset X, y vector& X = handler.getX(); mdlp::labels_t& y = handler.getY(); // Get className & Features auto className = handler.getClassName(); vector features; auto attributes = handler.getAttributes(); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); Tensor Xd; auto states = map>(); if (discretize_dataset) { auto Xr = discretizeDataset(X, y); Xd = torch::zeros({ static_cast(Xr[0].size()), static_cast(Xr.size()) }, torch::kInt32); for (int i = 0; i < features.size(); ++i) { states[features[i]] = vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); auto item = states.at(features[i]); iota(begin(item), end(item), 0); Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32)); } states[className] = vector(*max_element(y.begin(), y.end()) + 1); iota(begin(states.at(className)), end(states.at(className)), 0); } else { Xd = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, torch::kFloat32); for (int i = 0; i < features.size(); ++i) { Xd.index_put_({ "...", i }, torch::tensor(X[i])); } } return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; } tuple>, vector, vector, string, map>> loadFile(const string& name) { auto handler = ArffFiles(); handler.load(Paths::datasets() + static_cast(name) + ".arff"); // Get Dataset X, y vector& X = handler.getX(); mdlp::labels_t& y = handler.getY(); // Get className & Features auto className = handler.getClassName(); vector features; auto attributes = handler.getAttributes(); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); // Discretize Dataset vector Xd; map maxes; tie(Xd, maxes) = discretize(X, y, features); maxes[className] = *max_element(y.begin(), y.end()) + 1; map> states; for (auto feature : features) { states[feature] = vector(maxes[feature]); } states[className] = vector(maxes[className]); return { Xd, y, features, className, states }; }