#ifndef TEST_UTILS_H #define TEST_UTILS_H #include #include #include #include #include #include "ArffFiles.h" #include "CPPFImdlp.h" using namespace std; bool file_exists(const std::string& name); pair, map> discretize(vector& X, mdlp::labels_t& y, vector features); vector discretizeDataset(vector& X, mdlp::labels_t& y); tuple>, vector, vector, string, map>> loadFile(const string& name); tuple, string, map>> loadDataset(const string& name, bool class_last, bool discretize_dataset); class RawDatasets { public: RawDatasets(const string& file_name, bool discretize) { // Xt can be either discretized or not tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize); // Xv is always discretized tie(Xv, yv, featuresv, classNamev, statesv) = loadFile(file_name); auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1); dataset = torch::cat({ Xt, yresized }, 0); nSamples = dataset.size(1); weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble); classNumStates = discretize ? statest.at(classNamet).size() : 0; } torch::Tensor Xt, yt, dataset, weights; vector> Xv; vector yv; vector featurest, featuresv; map> statest, statesv; string classNamet, classNamev; int nSamples, classNumStates; double epsilon = 1e-5; }; #endif //TEST_UTILS_H