Files
BayesNet/tests/TestUtils.h

97 lines
3.9 KiB
C++

// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef TEST_UTILS_H
#define TEST_UTILS_H
#include <torch/torch.h>
#include <string>
#include <vector>
#include <map>
#include <tuple>
#include <ArffFiles.hpp>
#include <fimdlp/CPPFImdlp.h>
#include <folding.hpp>
#include <bayesnet/network/Network.h>
class RawDatasets {
public:
RawDatasets(const std::string& file_name, bool discretize_, int num_samples_ = 0, bool shuffle_ = false, bool class_last = true, bool debug = false);
torch::Tensor Xt, yt, dataset, weights;
torch::Tensor X_train, y_train, X_test, y_test;
std::vector<vector<int>> Xv;
std::vector<int> yv;
std::vector<double> weightsv;
std::vector<string> features;
std::string className;
map<std::string, std::vector<int>> states;
//catalog holds the mapping between dataset names and their corresponding indices of numeric features (-1) means all are numeric
//and an empty vector means none are numeric
map<std::string, std::vector<int>> catalog;
int nSamples, classNumStates;
double epsilon = 1e-5;
bool discretize;
int num_samples = 0;
bool shuffle = false;
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
private:
std::string to_string()
{
std::string features_ = "";
for (auto& f : features) {
features_ += f + " ";
}
std::string states_ = "";
for (auto& s : states) {
states_ += s.first + " ";
for (auto& v : s.second) {
states_ += std::to_string(v) + " ";
}
states_ += "\n";
}
return "Xt dimensions: " + std::to_string(Xt.size(0)) + " " + std::to_string(Xt.size(1)) + "\n"
"Xv dimensions: " + std::to_string(Xv.size()) + " " + std::to_string(Xv[0].size()) + "\n"
+ "yt dimensions: " + std::to_string(yt.size(0)) + "\n"
+ "yv dimensions: " + std::to_string(yv.size()) + "\n"
+ "X_train dimensions: " + std::to_string(X_train.size(0)) + " " + std::to_string(X_train.size(1)) + "\n"
+ "X_test dimensions: " + std::to_string(X_test.size(0)) + " " + std::to_string(X_test.size(1)) + "\n"
+ "y_train dimensions: " + std::to_string(y_train.size(0)) + "\n"
+ "y_test dimensions: " + std::to_string(y_test.size(0)) + "\n"
+ "features: " + std::to_string(features.size()) + "\n"
+ features_ + "\n"
+ "className: " + className + "\n"
+ "states: " + std::to_string(states.size()) + "\n"
+ "nSamples: " + std::to_string(nSamples) + "\n"
+ "classNumStates: " + std::to_string(classNumStates) + "\n"
+ "states: " + states_ + "\n";
}
std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
std::vector<std::string> split(const std::string& text, char delimiter)
{
std::vector<std::string> result;
std::stringstream ss(text);
std::string token;
while (std::getline(ss, token, delimiter)) {
result.push_back(trim(token));
}
return result;
}
map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X, const std::vector<bool>& is_numeric);
void loadDataset(const std::string& name, bool class_last);
map<std::string, std::vector<int>> loadCatalog();
};
#endif //TEST_UTILS_H