Remove platformUtils and split Datasets & Dataset
This commit is contained in:
parent
bb423da42f
commit
66ec1b343b
16
.vscode/launch.json
vendored
16
.vscode/launch.json
vendored
@ -22,26 +22,24 @@
|
|||||||
"type": "lldb",
|
"type": "lldb",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"name": "experiment",
|
"name": "experiment",
|
||||||
"program": "${workspaceFolder}/build/src/Platform/main",
|
"program": "${workspaceFolder}/build/src/Platform/b_main",
|
||||||
"args": [
|
"args": [
|
||||||
"-m",
|
"-m",
|
||||||
"BoostAODE",
|
"TAN",
|
||||||
"-p",
|
|
||||||
"/Users/rmontanana/Code/discretizbench/datasets",
|
|
||||||
"--stratified",
|
"--stratified",
|
||||||
"-d",
|
"-d",
|
||||||
"mfeat-morphological",
|
"zoo",
|
||||||
"--discretize"
|
"--discretize"
|
||||||
// "--hyperparameters",
|
// "--hyperparameters",
|
||||||
// "{\"repeatSparent\": true, \"maxModels\": 12}"
|
// "{\"repeatSparent\": true, \"maxModels\": 12}"
|
||||||
],
|
],
|
||||||
"cwd": "/Users/rmontanana/Code/discretizbench",
|
"cwd": "/Users/rmontanana/Code/odtebench",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "lldb",
|
"type": "lldb",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"name": "best",
|
"name": "best",
|
||||||
"program": "${workspaceFolder}/build/src/Platform/best",
|
"program": "${workspaceFolder}/build/src/Platform/b_best",
|
||||||
"args": [
|
"args": [
|
||||||
"-m",
|
"-m",
|
||||||
"BoostAODE",
|
"BoostAODE",
|
||||||
@ -55,7 +53,7 @@
|
|||||||
"type": "lldb",
|
"type": "lldb",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"name": "manage",
|
"name": "manage",
|
||||||
"program": "${workspaceFolder}/build/src/Platform/manage",
|
"program": "${workspaceFolder}/build/src/Platform/b_manage",
|
||||||
"args": [
|
"args": [
|
||||||
"-n",
|
"-n",
|
||||||
"20"
|
"20"
|
||||||
@ -66,7 +64,7 @@
|
|||||||
"type": "lldb",
|
"type": "lldb",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"name": "list",
|
"name": "list",
|
||||||
"program": "${workspaceFolder}/build/src/Platform/list",
|
"program": "${workspaceFolder}/build/src/Platform/b_list",
|
||||||
"args": [],
|
"args": [],
|
||||||
"cwd": "/Users/rmontanana/Code/discretizbench",
|
"cwd": "/Users/rmontanana/Code/discretizbench",
|
||||||
},
|
},
|
||||||
|
@ -5,9 +5,9 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
|||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
|
||||||
add_executable(b_main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
|
add_executable(b_main main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc)
|
||||||
add_executable(b_manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc ExcelFile.cc)
|
add_executable(b_manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
||||||
add_executable(b_list list.cc platformUtils Datasets.cc)
|
add_executable(b_list list.cc Datasets.cc Dataset.cc)
|
||||||
add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc)
|
add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc)
|
||||||
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
||||||
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
|
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
|
||||||
|
225
src/Platform/Dataset.cc
Normal file
225
src/Platform/Dataset.cc
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
#include "Dataset.h"
|
||||||
|
#include "ArffFiles.h"
|
||||||
|
#include <fstream>
|
||||||
|
namespace platform {
|
||||||
|
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
string Dataset::getName() const
|
||||||
|
{
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
string Dataset::getClassName() const
|
||||||
|
{
|
||||||
|
return className;
|
||||||
|
}
|
||||||
|
vector<string> Dataset::getFeatures() const
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
return features;
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int Dataset::getNFeatures() const
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
return n_features;
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int Dataset::getNSamples() const
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
return n_samples;
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
map<string, vector<int>> Dataset::getStates() const
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
return states;
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
return { Xv, yv };
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
return { Xd, yv };
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
buildTensors();
|
||||||
|
return { X, y };
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Dataset::load_csv()
|
||||||
|
{
|
||||||
|
ifstream file(path + "/" + name + ".csv");
|
||||||
|
if (file.is_open()) {
|
||||||
|
string line;
|
||||||
|
getline(file, line);
|
||||||
|
vector<string> tokens = split(line, ',');
|
||||||
|
features = vector<string>(tokens.begin(), tokens.end() - 1);
|
||||||
|
if (className == "-1") {
|
||||||
|
className = tokens.back();
|
||||||
|
}
|
||||||
|
for (auto i = 0; i < features.size(); ++i) {
|
||||||
|
Xv.push_back(vector<float>());
|
||||||
|
}
|
||||||
|
while (getline(file, line)) {
|
||||||
|
tokens = split(line, ',');
|
||||||
|
for (auto i = 0; i < features.size(); ++i) {
|
||||||
|
Xv[i].push_back(stof(tokens[i]));
|
||||||
|
}
|
||||||
|
yv.push_back(stoi(tokens.back()));
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Unable to open dataset file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Dataset::computeStates()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < features.size(); ++i) {
|
||||||
|
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
|
||||||
|
auto item = states.at(features[i]);
|
||||||
|
iota(begin(item), end(item), 0);
|
||||||
|
}
|
||||||
|
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
|
||||||
|
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||||
|
}
|
||||||
|
void Dataset::load_arff()
|
||||||
|
{
|
||||||
|
auto arff = ArffFiles();
|
||||||
|
arff.load(path + "/" + name + ".arff", className);
|
||||||
|
// Get Dataset X, y
|
||||||
|
Xv = arff.getX();
|
||||||
|
yv = arff.getY();
|
||||||
|
// Get className & Features
|
||||||
|
className = arff.getClassName();
|
||||||
|
auto attributes = arff.getAttributes();
|
||||||
|
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
|
||||||
|
}
|
||||||
|
vector<string> tokenize(string line)
|
||||||
|
{
|
||||||
|
vector<string> tokens;
|
||||||
|
for (auto i = 0; i < line.size(); ++i) {
|
||||||
|
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
|
||||||
|
string token = line.substr(0, i);
|
||||||
|
tokens.push_back(token);
|
||||||
|
line.erase(line.begin(), line.begin() + i + 1);
|
||||||
|
i = 0;
|
||||||
|
while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
|
||||||
|
line.erase(line.begin(), line.begin() + i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (line.size() > 0) {
|
||||||
|
tokens.push_back(line);
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
void Dataset::load_rdata()
|
||||||
|
{
|
||||||
|
ifstream file(path + "/" + name + "_R.dat");
|
||||||
|
if (file.is_open()) {
|
||||||
|
string line;
|
||||||
|
getline(file, line);
|
||||||
|
line = ArffFiles::trim(line);
|
||||||
|
vector<string> tokens = tokenize(line);
|
||||||
|
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
|
||||||
|
if (className == "-1") {
|
||||||
|
className = ArffFiles::trim(tokens.back());
|
||||||
|
}
|
||||||
|
for (auto i = 0; i < features.size(); ++i) {
|
||||||
|
Xv.push_back(vector<float>());
|
||||||
|
}
|
||||||
|
while (getline(file, line)) {
|
||||||
|
tokens = tokenize(line);
|
||||||
|
// We have to skip the first token, which is the instance number.
|
||||||
|
for (auto i = 1; i < features.size() + 1; ++i) {
|
||||||
|
const float value = stof(tokens[i]);
|
||||||
|
Xv[i - 1].push_back(value);
|
||||||
|
}
|
||||||
|
yv.push_back(stoi(tokens.back()));
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Unable to open dataset file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Dataset::load()
|
||||||
|
{
|
||||||
|
if (loaded) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (fileType == CSV) {
|
||||||
|
load_csv();
|
||||||
|
} else if (fileType == ARFF) {
|
||||||
|
load_arff();
|
||||||
|
} else if (fileType == RDATA) {
|
||||||
|
load_rdata();
|
||||||
|
}
|
||||||
|
if (discretize) {
|
||||||
|
Xd = discretizeDataset(Xv, yv);
|
||||||
|
computeStates();
|
||||||
|
}
|
||||||
|
n_samples = Xv[0].size();
|
||||||
|
n_features = Xv.size();
|
||||||
|
loaded = true;
|
||||||
|
}
|
||||||
|
void Dataset::buildTensors()
|
||||||
|
{
|
||||||
|
if (discretize) {
|
||||||
|
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
|
||||||
|
} else {
|
||||||
|
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < features.size(); ++i) {
|
||||||
|
if (discretize) {
|
||||||
|
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||||
|
} else {
|
||||||
|
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
y = torch::tensor(yv, torch::kInt32);
|
||||||
|
}
|
||||||
|
vector<mdlp::labels_t> Dataset::discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||||
|
{
|
||||||
|
vector<mdlp::labels_t> Xd;
|
||||||
|
auto fimdlp = mdlp::CPPFImdlp();
|
||||||
|
for (int i = 0; i < X.size(); i++) {
|
||||||
|
fimdlp.fit(X[i], y);
|
||||||
|
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||||
|
Xd.push_back(xd);
|
||||||
|
}
|
||||||
|
return Xd;
|
||||||
|
}
|
||||||
|
vector<string> Dataset::split(const string& text, char delimiter)
|
||||||
|
{
|
||||||
|
vector<string> result;
|
||||||
|
stringstream ss(text);
|
||||||
|
string token;
|
||||||
|
while (getline(ss, token, delimiter)) {
|
||||||
|
result.push_back(token);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
80
src/Platform/Dataset.h
Normal file
80
src/Platform/Dataset.h
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#ifndef DATASET_H
|
||||||
|
#define DATASET_H
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include "CPPFImdlp.h"
|
||||||
|
namespace platform {
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
enum fileType_t { CSV, ARFF, RDATA };
|
||||||
|
class SourceData {
|
||||||
|
public:
|
||||||
|
SourceData(string source)
|
||||||
|
{
|
||||||
|
if (source == "Surcov") {
|
||||||
|
path = "datasets/";
|
||||||
|
fileType = CSV;
|
||||||
|
} else if (source == "Arff") {
|
||||||
|
path = "datasets/";
|
||||||
|
fileType = ARFF;
|
||||||
|
} else if (source == "Tanveer") {
|
||||||
|
path = "data/";
|
||||||
|
fileType = RDATA;
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Unknown source.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
string getPath()
|
||||||
|
{
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
fileType_t getFileType()
|
||||||
|
{
|
||||||
|
return fileType;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
string path;
|
||||||
|
fileType_t fileType;
|
||||||
|
};
|
||||||
|
class Dataset {
|
||||||
|
private:
|
||||||
|
string path;
|
||||||
|
string name;
|
||||||
|
fileType_t fileType;
|
||||||
|
string className;
|
||||||
|
int n_samples{ 0 }, n_features{ 0 };
|
||||||
|
vector<string> features;
|
||||||
|
map<string, vector<int>> states;
|
||||||
|
bool loaded;
|
||||||
|
bool discretize;
|
||||||
|
torch::Tensor X, y;
|
||||||
|
vector<vector<float>> Xv;
|
||||||
|
vector<vector<int>> Xd;
|
||||||
|
vector<int> yv;
|
||||||
|
void buildTensors();
|
||||||
|
void load_csv();
|
||||||
|
void load_arff();
|
||||||
|
void load_rdata();
|
||||||
|
void computeStates();
|
||||||
|
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y);
|
||||||
|
public:
|
||||||
|
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
|
||||||
|
explicit Dataset(const Dataset&);
|
||||||
|
static vector<string> split(const string& text, char delimiter);
|
||||||
|
string getName() const;
|
||||||
|
string getClassName() const;
|
||||||
|
vector<string> getFeatures() const;
|
||||||
|
map<string, vector<int>> getStates() const;
|
||||||
|
pair<vector<vector<float>>&, vector<int>&> getVectors();
|
||||||
|
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
|
||||||
|
pair<torch::Tensor&, torch::Tensor&> getTensors();
|
||||||
|
int getNFeatures() const;
|
||||||
|
int getNSamples() const;
|
||||||
|
void load();
|
||||||
|
const bool inline isLoaded() const { return loaded; };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -1,6 +1,4 @@
|
|||||||
#include "Datasets.h"
|
#include "Datasets.h"
|
||||||
#include "platformUtils.h"
|
|
||||||
#include "ArffFiles.h"
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
namespace platform {
|
namespace platform {
|
||||||
void Datasets::load()
|
void Datasets::load()
|
||||||
@ -15,7 +13,7 @@ namespace platform {
|
|||||||
if (line.empty() || line[0] == '#') {
|
if (line.empty() || line[0] == '#') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
vector<string> tokens = split(line, ',');
|
vector<string> tokens = Dataset::split(line, ',');
|
||||||
string name = tokens[0];
|
string name = tokens[0];
|
||||||
string className;
|
string className;
|
||||||
try {
|
try {
|
||||||
@ -129,203 +127,4 @@ namespace platform {
|
|||||||
{
|
{
|
||||||
return datasets.find(name) != datasets.end();
|
return datasets.find(name) != datasets.end();
|
||||||
}
|
}
|
||||||
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
string Dataset::getName() const
|
|
||||||
{
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
string Dataset::getClassName() const
|
|
||||||
{
|
|
||||||
return className;
|
|
||||||
}
|
|
||||||
vector<string> Dataset::getFeatures() const
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
return features;
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Dataset not loaded.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int Dataset::getNFeatures() const
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
return n_features;
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Dataset not loaded.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int Dataset::getNSamples() const
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
return n_samples;
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Dataset not loaded.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
map<string, vector<int>> Dataset::getStates() const
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
return states;
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Dataset not loaded.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
return { Xv, yv };
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Dataset not loaded.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
return { Xd, yv };
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Dataset not loaded.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
buildTensors();
|
|
||||||
return { X, y };
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Dataset not loaded.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void Dataset::load_csv()
|
|
||||||
{
|
|
||||||
ifstream file(path + "/" + name + ".csv");
|
|
||||||
if (file.is_open()) {
|
|
||||||
string line;
|
|
||||||
getline(file, line);
|
|
||||||
vector<string> tokens = split(line, ',');
|
|
||||||
features = vector<string>(tokens.begin(), tokens.end() - 1);
|
|
||||||
if (className == "-1") {
|
|
||||||
className = tokens.back();
|
|
||||||
}
|
|
||||||
for (auto i = 0; i < features.size(); ++i) {
|
|
||||||
Xv.push_back(vector<float>());
|
|
||||||
}
|
|
||||||
while (getline(file, line)) {
|
|
||||||
tokens = split(line, ',');
|
|
||||||
for (auto i = 0; i < features.size(); ++i) {
|
|
||||||
Xv[i].push_back(stof(tokens[i]));
|
|
||||||
}
|
|
||||||
yv.push_back(stoi(tokens.back()));
|
|
||||||
}
|
|
||||||
file.close();
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Unable to open dataset file.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void Dataset::computeStates()
|
|
||||||
{
|
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
|
||||||
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
|
|
||||||
auto item = states.at(features[i]);
|
|
||||||
iota(begin(item), end(item), 0);
|
|
||||||
}
|
|
||||||
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
|
|
||||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
|
||||||
}
|
|
||||||
void Dataset::load_arff()
|
|
||||||
{
|
|
||||||
auto arff = ArffFiles();
|
|
||||||
arff.load(path + "/" + name + ".arff", className);
|
|
||||||
// Get Dataset X, y
|
|
||||||
Xv = arff.getX();
|
|
||||||
yv = arff.getY();
|
|
||||||
// Get className & Features
|
|
||||||
className = arff.getClassName();
|
|
||||||
auto attributes = arff.getAttributes();
|
|
||||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
|
|
||||||
}
|
|
||||||
vector<string> tokenize(string line)
|
|
||||||
{
|
|
||||||
vector<string> tokens;
|
|
||||||
for (auto i = 0; i < line.size(); ++i) {
|
|
||||||
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
|
|
||||||
string token = line.substr(0, i);
|
|
||||||
tokens.push_back(token);
|
|
||||||
line.erase(line.begin(), line.begin() + i + 1);
|
|
||||||
i = 0;
|
|
||||||
while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
|
|
||||||
line.erase(line.begin(), line.begin() + i + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (line.size() > 0) {
|
|
||||||
tokens.push_back(line);
|
|
||||||
}
|
|
||||||
return tokens;
|
|
||||||
}
|
|
||||||
void Dataset::load_rdata()
|
|
||||||
{
|
|
||||||
ifstream file(path + "/" + name + "_R.dat");
|
|
||||||
if (file.is_open()) {
|
|
||||||
string line;
|
|
||||||
getline(file, line);
|
|
||||||
line = ArffFiles::trim(line);
|
|
||||||
vector<string> tokens = tokenize(line);
|
|
||||||
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
|
|
||||||
if (className == "-1") {
|
|
||||||
className = ArffFiles::trim(tokens.back());
|
|
||||||
}
|
|
||||||
for (auto i = 0; i < features.size(); ++i) {
|
|
||||||
Xv.push_back(vector<float>());
|
|
||||||
}
|
|
||||||
while (getline(file, line)) {
|
|
||||||
tokens = tokenize(line);
|
|
||||||
// We have to skip the first token, which is the instance number.
|
|
||||||
for (auto i = 1; i < features.size() + 1; ++i) {
|
|
||||||
const float value = stof(tokens[i]);
|
|
||||||
Xv[i - 1].push_back(value);
|
|
||||||
}
|
|
||||||
yv.push_back(stoi(tokens.back()));
|
|
||||||
}
|
|
||||||
file.close();
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Unable to open dataset file.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void Dataset::load()
|
|
||||||
{
|
|
||||||
if (loaded) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (fileType == CSV) {
|
|
||||||
load_csv();
|
|
||||||
} else if (fileType == ARFF) {
|
|
||||||
load_arff();
|
|
||||||
} else if (fileType == RDATA) {
|
|
||||||
load_rdata();
|
|
||||||
}
|
|
||||||
if (discretize) {
|
|
||||||
Xd = discretizeDataset(Xv, yv);
|
|
||||||
computeStates();
|
|
||||||
}
|
|
||||||
n_samples = Xv[0].size();
|
|
||||||
n_features = Xv.size();
|
|
||||||
loaded = true;
|
|
||||||
}
|
|
||||||
void Dataset::buildTensors()
|
|
||||||
{
|
|
||||||
if (discretize) {
|
|
||||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
|
|
||||||
} else {
|
|
||||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
|
||||||
if (discretize) {
|
|
||||||
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
|
||||||
} else {
|
|
||||||
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
y = torch::tensor(yv, torch::kInt32);
|
|
||||||
}
|
|
||||||
}
|
}
|
@ -1,76 +1,8 @@
|
|||||||
#ifndef DATASETS_H
|
#ifndef DATASETS_H
|
||||||
#define DATASETS_H
|
#define DATASETS_H
|
||||||
#include <torch/torch.h>
|
#include "Dataset.h"
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
enum fileType_t { CSV, ARFF, RDATA };
|
|
||||||
class SourceData {
|
|
||||||
public:
|
|
||||||
SourceData(string source)
|
|
||||||
{
|
|
||||||
if (source == "Surcov") {
|
|
||||||
path = "datasets/";
|
|
||||||
fileType = CSV;
|
|
||||||
} else if (source == "Arff") {
|
|
||||||
path = "datasets/";
|
|
||||||
fileType = ARFF;
|
|
||||||
} else if (source == "Tanveer") {
|
|
||||||
path = "data/";
|
|
||||||
fileType = RDATA;
|
|
||||||
} else {
|
|
||||||
throw invalid_argument("Unknown source.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
string getPath()
|
|
||||||
{
|
|
||||||
return path;
|
|
||||||
}
|
|
||||||
fileType_t getFileType()
|
|
||||||
{
|
|
||||||
return fileType;
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
string path;
|
|
||||||
fileType_t fileType;
|
|
||||||
};
|
|
||||||
class Dataset {
|
|
||||||
private:
|
|
||||||
string path;
|
|
||||||
string name;
|
|
||||||
fileType_t fileType;
|
|
||||||
string className;
|
|
||||||
int n_samples{ 0 }, n_features{ 0 };
|
|
||||||
vector<string> features;
|
|
||||||
map<string, vector<int>> states;
|
|
||||||
bool loaded;
|
|
||||||
bool discretize;
|
|
||||||
torch::Tensor X, y;
|
|
||||||
vector<vector<float>> Xv;
|
|
||||||
vector<vector<int>> Xd;
|
|
||||||
vector<int> yv;
|
|
||||||
void buildTensors();
|
|
||||||
void load_csv();
|
|
||||||
void load_arff();
|
|
||||||
void load_rdata();
|
|
||||||
void computeStates();
|
|
||||||
public:
|
|
||||||
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
|
|
||||||
explicit Dataset(const Dataset&);
|
|
||||||
string getName() const;
|
|
||||||
string getClassName() const;
|
|
||||||
vector<string> getFeatures() const;
|
|
||||||
map<string, vector<int>> getStates() const;
|
|
||||||
pair<vector<vector<float>>&, vector<int>&> getVectors();
|
|
||||||
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
|
|
||||||
pair<torch::Tensor&, torch::Tensor&> getTensors();
|
|
||||||
int getNFeatures() const;
|
|
||||||
int getNSamples() const;
|
|
||||||
void load();
|
|
||||||
const bool inline isLoaded() const { return loaded; };
|
|
||||||
};
|
|
||||||
class Datasets {
|
class Datasets {
|
||||||
private:
|
private:
|
||||||
string path;
|
string path;
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include "platformUtils.h"
|
#include "Dataset.h"
|
||||||
namespace platform {
|
namespace platform {
|
||||||
class DotEnv {
|
class DotEnv {
|
||||||
private:
|
private:
|
||||||
@ -51,7 +51,7 @@ namespace platform {
|
|||||||
auto seeds_str = env["seeds"];
|
auto seeds_str = env["seeds"];
|
||||||
seeds_str = trim(seeds_str);
|
seeds_str = trim(seeds_str);
|
||||||
seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
|
seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
|
||||||
auto seeds_str_split = split(seeds_str, ',');
|
auto seeds_str_split = Dataset::split(seeds_str, ',');
|
||||||
transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
|
transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
|
||||||
return stoi(str);
|
return stoi(str);
|
||||||
});
|
});
|
||||||
|
@ -102,12 +102,12 @@ namespace platform {
|
|||||||
cout << data.dump(4) << endl;
|
cout << data.dump(4) << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Experiment::go(vector<string> filesToProcess, const string& path)
|
void Experiment::go(vector<string> filesToProcess)
|
||||||
{
|
{
|
||||||
cout << "*** Starting experiment: " << title << " ***" << endl;
|
cout << "*** Starting experiment: " << title << " ***" << endl;
|
||||||
for (auto fileName : filesToProcess) {
|
for (auto fileName : filesToProcess) {
|
||||||
cout << "- " << setw(20) << left << fileName << " " << right << flush;
|
cout << "- " << setw(20) << left << fileName << " " << right << flush;
|
||||||
cross_validation(path, fileName);
|
cross_validation(fileName);
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -132,7 +132,7 @@ namespace platform {
|
|||||||
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||||
|
|
||||||
}
|
}
|
||||||
void Experiment::cross_validation(const string& path, const string& fileName)
|
void Experiment::cross_validation(const string& fileName)
|
||||||
{
|
{
|
||||||
auto env = platform::DotEnv();
|
auto env = platform::DotEnv();
|
||||||
auto datasets = platform::Datasets(discretized, env.get("source_data"));
|
auto datasets = platform::Datasets(discretized, env.get("source_data"));
|
||||||
|
@ -108,8 +108,8 @@ namespace platform {
|
|||||||
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
|
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
|
||||||
string get_file_name();
|
string get_file_name();
|
||||||
void save(const string& path);
|
void save(const string& path);
|
||||||
void cross_validation(const string& path, const string& fileName);
|
void cross_validation(const string& fileName);
|
||||||
void go(vector<string> filesToProcess, const string& path);
|
void go(vector<string> filesToProcess);
|
||||||
void show();
|
void show();
|
||||||
void report();
|
void report();
|
||||||
};
|
};
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
namespace platform {
|
namespace platform {
|
||||||
class Paths {
|
class Paths {
|
||||||
public:
|
public:
|
||||||
static std::string datasets() { return "datasets/"; }
|
|
||||||
static std::string results() { return "results/"; }
|
static std::string results() { return "results/"; }
|
||||||
static std::string excel() { return "excel/"; }
|
static std::string excel() { return "excel/"; }
|
||||||
};
|
};
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include "platformUtils.h"
|
|
||||||
#include "Results.h"
|
#include "Results.h"
|
||||||
#include "ReportConsole.h"
|
#include "ReportConsole.h"
|
||||||
#include "ReportExcel.h"
|
#include "ReportExcel.h"
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <argparse/argparse.hpp>
|
#include <argparse/argparse.hpp>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
#include "platformUtils.h"
|
|
||||||
#include "Experiment.h"
|
#include "Experiment.h"
|
||||||
#include "Datasets.h"
|
#include "Datasets.h"
|
||||||
#include "DotEnv.h"
|
#include "DotEnv.h"
|
||||||
@ -19,9 +18,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
|
|||||||
argparse::ArgumentParser program("main");
|
argparse::ArgumentParser program("main");
|
||||||
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
|
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
|
||||||
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
|
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
|
||||||
program.add_argument("-p", "--path")
|
|
||||||
.help("folder where the data files are located, default")
|
|
||||||
.default_value(string{ platform::Paths::datasets() });
|
|
||||||
program.add_argument("-m", "--model")
|
program.add_argument("-m", "--model")
|
||||||
.help("Model to use " + platform::Models::instance()->toString())
|
.help("Model to use " + platform::Models::instance()->toString())
|
||||||
.action([](const std::string& value) {
|
.action([](const std::string& value) {
|
||||||
@ -55,13 +51,11 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
|
|||||||
try {
|
try {
|
||||||
program.parse_args(argc, argv);
|
program.parse_args(argc, argv);
|
||||||
auto file_name = program.get<string>("dataset");
|
auto file_name = program.get<string>("dataset");
|
||||||
auto path = program.get<string>("path");
|
|
||||||
auto model_name = program.get<string>("model");
|
auto model_name = program.get<string>("model");
|
||||||
auto discretize_dataset = program.get<bool>("discretize");
|
auto discretize_dataset = program.get<bool>("discretize");
|
||||||
auto stratified = program.get<bool>("stratified");
|
auto stratified = program.get<bool>("stratified");
|
||||||
auto n_folds = program.get<int>("folds");
|
auto n_folds = program.get<int>("folds");
|
||||||
auto seeds = program.get<vector<int>>("seeds");
|
auto seeds = program.get<vector<int>>("seeds");
|
||||||
auto complete_file_name = path + file_name + ".arff";
|
|
||||||
auto title = program.get<string>("title");
|
auto title = program.get<string>("title");
|
||||||
auto hyperparameters = program.get<string>("hyperparameters");
|
auto hyperparameters = program.get<string>("hyperparameters");
|
||||||
auto saveResults = program.get<bool>("save");
|
auto saveResults = program.get<bool>("save");
|
||||||
@ -81,7 +75,6 @@ int main(int argc, char** argv)
|
|||||||
{
|
{
|
||||||
auto program = manageArguments(argc, argv);
|
auto program = manageArguments(argc, argv);
|
||||||
auto file_name = program.get<string>("dataset");
|
auto file_name = program.get<string>("dataset");
|
||||||
auto path = program.get<string>("path");
|
|
||||||
auto model_name = program.get<string>("model");
|
auto model_name = program.get<string>("model");
|
||||||
auto discretize_dataset = program.get<bool>("discretize");
|
auto discretize_dataset = program.get<bool>("discretize");
|
||||||
auto stratified = program.get<bool>("stratified");
|
auto stratified = program.get<bool>("stratified");
|
||||||
@ -120,7 +113,7 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
platform::Timer timer;
|
platform::Timer timer;
|
||||||
timer.start();
|
timer.start();
|
||||||
experiment.go(filesToTest, path);
|
experiment.go(filesToTest);
|
||||||
experiment.setDuration(timer.getDuration());
|
experiment.setDuration(timer.getDuration());
|
||||||
if (saveResults) {
|
if (saveResults) {
|
||||||
experiment.save(platform::Paths::results());
|
experiment.save(platform::Paths::results());
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <argparse/argparse.hpp>
|
#include <argparse/argparse.hpp>
|
||||||
#include "platformUtils.h"
|
|
||||||
#include "Paths.h"
|
#include "Paths.h"
|
||||||
#include "Results.h"
|
#include "Results.h"
|
||||||
|
|
||||||
|
@ -1,110 +0,0 @@
|
|||||||
#include "platformUtils.h"
|
|
||||||
#include "Paths.h"
|
|
||||||
|
|
||||||
using namespace torch;
|
|
||||||
|
|
||||||
vector<string> split(const string& text, char delimiter)
|
|
||||||
{
|
|
||||||
vector<string> result;
|
|
||||||
stringstream ss(text);
|
|
||||||
string token;
|
|
||||||
while (getline(ss, token, delimiter)) {
|
|
||||||
result.push_back(token);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features)
|
|
||||||
{
|
|
||||||
vector<mdlp::labels_t> Xd;
|
|
||||||
map<string, int> maxes;
|
|
||||||
auto fimdlp = mdlp::CPPFImdlp();
|
|
||||||
for (int i = 0; i < X.size(); i++) {
|
|
||||||
fimdlp.fit(X[i], y);
|
|
||||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
|
||||||
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
|
|
||||||
Xd.push_back(xd);
|
|
||||||
}
|
|
||||||
return { Xd, maxes };
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
|
||||||
{
|
|
||||||
vector<mdlp::labels_t> Xd;
|
|
||||||
auto fimdlp = mdlp::CPPFImdlp();
|
|
||||||
for (int i = 0; i < X.size(); i++) {
|
|
||||||
fimdlp.fit(X[i], y);
|
|
||||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
|
||||||
Xd.push_back(xd);
|
|
||||||
}
|
|
||||||
return Xd;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool file_exists(const string& name)
|
|
||||||
{
|
|
||||||
if (FILE* file = fopen(name.c_str(), "r")) {
|
|
||||||
fclose(file);
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset)
|
|
||||||
{
|
|
||||||
auto handler = ArffFiles();
|
|
||||||
handler.load(path + static_cast<string>(name) + ".arff", class_last);
|
|
||||||
// Get Dataset X, y
|
|
||||||
vector<mdlp::samples_t>& X = handler.getX();
|
|
||||||
mdlp::labels_t& y = handler.getY();
|
|
||||||
// Get className & Features
|
|
||||||
auto className = handler.getClassName();
|
|
||||||
vector<string> features;
|
|
||||||
auto attributes = handler.getAttributes();
|
|
||||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
|
||||||
Tensor Xd;
|
|
||||||
auto states = map<string, vector<int>>();
|
|
||||||
if (discretize_dataset) {
|
|
||||||
auto Xr = discretizeDataset(X, y);
|
|
||||||
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
|
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
|
||||||
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
|
||||||
auto item = states.at(features[i]);
|
|
||||||
iota(begin(item), end(item), 0);
|
|
||||||
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
|
|
||||||
}
|
|
||||||
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
|
|
||||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
|
||||||
} else {
|
|
||||||
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
|
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
|
||||||
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
|
|
||||||
}
|
|
||||||
|
|
||||||
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name)
|
|
||||||
{
|
|
||||||
auto handler = ArffFiles();
|
|
||||||
handler.load(platform::Paths::datasets() + static_cast<string>(name) + ".arff");
|
|
||||||
// Get Dataset X, y
|
|
||||||
vector<mdlp::samples_t>& X = handler.getX();
|
|
||||||
mdlp::labels_t& y = handler.getY();
|
|
||||||
// Get className & Features
|
|
||||||
auto className = handler.getClassName();
|
|
||||||
vector<string> features;
|
|
||||||
auto attributes = handler.getAttributes();
|
|
||||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
|
||||||
// Discretize Dataset
|
|
||||||
vector<mdlp::labels_t> Xd;
|
|
||||||
map<string, int> maxes;
|
|
||||||
tie(Xd, maxes) = discretize(X, y, features);
|
|
||||||
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
|
||||||
map<string, vector<int>> states;
|
|
||||||
for (auto feature : features) {
|
|
||||||
states[feature] = vector<int>(maxes[feature]);
|
|
||||||
}
|
|
||||||
states[className] = vector<int>(maxes[className]);
|
|
||||||
return { Xd, y, features, className, states };
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
#ifndef PLATFORM_UTILS_H
|
|
||||||
#define PLATFORM_UTILS_H
|
|
||||||
#include <torch/torch.h>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <map>
|
|
||||||
#include <tuple>
|
|
||||||
#include "ArffFiles.h"
|
|
||||||
#include "CPPFImdlp.h"
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
bool file_exists(const std::string& name);
|
|
||||||
vector<string> split(const string& text, char delimiter);
|
|
||||||
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
|
|
||||||
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y);
|
|
||||||
pair<torch::Tensor, map<string, vector<int>>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string>& features, const string& className);
|
|
||||||
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name);
|
|
||||||
tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset);
|
|
||||||
map<string, vector<int>> get_states(vector<string>& features, string className, map<string, int>& maxes);
|
|
||||||
#endif //PLATFORM_UTILS_H
|
|
@ -9,7 +9,6 @@
|
|||||||
#include "TAN.h"
|
#include "TAN.h"
|
||||||
#include "SPODE.h"
|
#include "SPODE.h"
|
||||||
#include "AODE.h"
|
#include "AODE.h"
|
||||||
#include "platformUtils.h"
|
|
||||||
|
|
||||||
TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
|
TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
|
||||||
{
|
{
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
#include <catch2/generators/catch_generators.hpp>
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "KDB.h"
|
#include "KDB.h"
|
||||||
#include "platformUtils.h"
|
|
||||||
|
|
||||||
TEST_CASE("Test Bayesian Network")
|
TEST_CASE("Test Bayesian Network")
|
||||||
{
|
{
|
||||||
|
@ -5,7 +5,7 @@ if(ENABLE_TESTING)
|
|||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||||
set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCE_DIR}/src/Platform/platformUtils.cc ${BayesNet_SOURCES})
|
set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCES})
|
||||||
add_executable(${TEST_MAIN} ${TEST_SOURCES})
|
add_executable(${TEST_MAIN} ${TEST_SOURCES})
|
||||||
target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
|
target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
|
||||||
add_test(NAME ${TEST_MAIN} COMMAND ${TEST_MAIN})
|
add_test(NAME ${TEST_MAIN} COMMAND ${TEST_MAIN})
|
||||||
|
Loading…
Reference in New Issue
Block a user