2024-04-11 16:02:49 +00:00
|
|
|
// ***************************************************************
|
|
|
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
|
|
|
// SPDX-FileType: SOURCE
|
|
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
// ***************************************************************
|
|
|
|
|
2023-10-04 21:19:23 +00:00
|
|
|
#ifndef TEST_UTILS_H
|
|
|
|
#define TEST_UTILS_H
|
|
|
|
#include <torch/torch.h>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <map>
|
2024-01-07 18:58:22 +00:00
|
|
|
#include <tuple>
|
2024-05-21 11:50:19 +00:00
|
|
|
#include <ArffFiles.hpp>
|
2024-11-23 17:22:41 +00:00
|
|
|
#include <CPPFImdlp.h>
|
2024-04-30 00:11:14 +00:00
|
|
|
#include <folding.hpp>
|
2024-06-11 09:40:45 +00:00
|
|
|
#include <bayesnet/network/Network.h>
|
2023-10-04 21:19:23 +00:00
|
|
|
|
|
|
|
|
2023-10-06 15:08:54 +00:00
|
|
|
class RawDatasets {
|
|
|
|
public:
|
2024-04-30 09:02:23 +00:00
|
|
|
RawDatasets(const std::string& file_name, bool discretize_, int num_samples_ = 0, bool shuffle_ = false, bool class_last = true, bool debug = false);
|
2023-10-06 15:08:54 +00:00
|
|
|
torch::Tensor Xt, yt, dataset, weights;
|
2024-04-30 00:11:14 +00:00
|
|
|
torch::Tensor X_train, y_train, X_test, y_test;
|
2023-11-08 17:45:35 +00:00
|
|
|
std::vector<vector<int>> Xv;
|
|
|
|
std::vector<int> yv;
|
2024-04-30 00:11:14 +00:00
|
|
|
std::vector<double> weightsv;
|
|
|
|
std::vector<string> features;
|
|
|
|
std::string className;
|
|
|
|
map<std::string, std::vector<int>> states;
|
2023-10-06 15:08:54 +00:00
|
|
|
int nSamples, classNumStates;
|
|
|
|
double epsilon = 1e-5;
|
2024-04-30 00:11:14 +00:00
|
|
|
bool discretize;
|
2024-04-30 09:02:23 +00:00
|
|
|
int num_samples = 0;
|
2024-04-30 00:11:14 +00:00
|
|
|
bool shuffle = false;
|
2024-06-13 13:04:15 +00:00
|
|
|
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
|
2024-04-30 00:11:14 +00:00
|
|
|
private:
|
|
|
|
std::string to_string()
|
|
|
|
{
|
|
|
|
std::string features_ = "";
|
|
|
|
for (auto& f : features) {
|
|
|
|
features_ += f + " ";
|
|
|
|
}
|
|
|
|
std::string states_ = "";
|
|
|
|
for (auto& s : states) {
|
|
|
|
states_ += s.first + " ";
|
|
|
|
for (auto& v : s.second) {
|
|
|
|
states_ += std::to_string(v) + " ";
|
|
|
|
}
|
|
|
|
states_ += "\n";
|
|
|
|
}
|
|
|
|
return "Xt dimensions: " + std::to_string(Xt.size(0)) + " " + std::to_string(Xt.size(1)) + "\n"
|
|
|
|
"Xv dimensions: " + std::to_string(Xv.size()) + " " + std::to_string(Xv[0].size()) + "\n"
|
|
|
|
+ "yt dimensions: " + std::to_string(yt.size(0)) + "\n"
|
|
|
|
+ "yv dimensions: " + std::to_string(yv.size()) + "\n"
|
|
|
|
+ "X_train dimensions: " + std::to_string(X_train.size(0)) + " " + std::to_string(X_train.size(1)) + "\n"
|
|
|
|
+ "X_test dimensions: " + std::to_string(X_test.size(0)) + " " + std::to_string(X_test.size(1)) + "\n"
|
|
|
|
+ "y_train dimensions: " + std::to_string(y_train.size(0)) + "\n"
|
|
|
|
+ "y_test dimensions: " + std::to_string(y_test.size(0)) + "\n"
|
|
|
|
+ "features: " + std::to_string(features.size()) + "\n"
|
|
|
|
+ features_ + "\n"
|
|
|
|
+ "className: " + className + "\n"
|
|
|
|
+ "states: " + std::to_string(states.size()) + "\n"
|
|
|
|
+ "nSamples: " + std::to_string(nSamples) + "\n"
|
|
|
|
+ "classNumStates: " + std::to_string(classNumStates) + "\n"
|
|
|
|
+ "states: " + states_ + "\n";
|
|
|
|
}
|
|
|
|
map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X);
|
2024-04-30 09:02:23 +00:00
|
|
|
void loadDataset(const std::string& name, bool class_last);
|
2023-10-06 15:08:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif //TEST_UTILS_H
|