Adding Datasets management
This commit is contained in:
parent
3e954ba841
commit
bc214a496c
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@ -101,7 +101,8 @@
|
||||
"*.ipp": "cpp",
|
||||
"cassert": "cpp",
|
||||
"charconv": "cpp",
|
||||
"source_location": "cpp"
|
||||
"source_location": "cpp",
|
||||
"ranges": "cpp"
|
||||
},
|
||||
"cmake.configureOnOpen": false,
|
||||
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools"
|
||||
|
@ -42,7 +42,7 @@ vector<int>& ArffFiles::getY()
|
||||
return y;
|
||||
}
|
||||
|
||||
void ArffFiles::load(const string& fileName, bool classLast)
|
||||
void ArffFiles::loadCommon(string fileName)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
if (!file.is_open()) {
|
||||
@ -74,24 +74,50 @@ void ArffFiles::load(const string& fileName, bool classLast)
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
}
|
||||
|
||||
void ArffFiles::load(const string& fileName, bool classLast)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
labelIndex = static_cast<int>(attributes.size());
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
labelIndex = 0;
|
||||
}
|
||||
generateDataset(classLast);
|
||||
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
void ArffFiles::load(const string& fileName, const string& name)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
bool found = false;
|
||||
for (int i = 0; i < attributes.size(); ++i) {
|
||||
if (attributes[i].first == name) {
|
||||
className = get<0>(attributes[i]);
|
||||
classType = get<1>(attributes[i]);
|
||||
attributes.erase(attributes.begin() + i);
|
||||
labelIndex = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
throw invalid_argument("Class name not found");
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
|
||||
void ArffFiles::generateDataset(bool classLast)
|
||||
void ArffFiles::generateDataset(int labelIndex)
|
||||
{
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
auto yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
|
@ -14,12 +14,12 @@ private:
|
||||
string classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
|
||||
void generateDataset(bool);
|
||||
|
||||
void generateDataset(int);
|
||||
void loadCommon(string);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(const string&, bool = true);
|
||||
void load(const string&, const string&);
|
||||
vector<string> getLines() const;
|
||||
unsigned long int getSize() const;
|
||||
string getClassName() const;
|
||||
|
@ -17,6 +17,7 @@ namespace bayesnet {
|
||||
vector<string> virtual show() = 0;
|
||||
vector<string> virtual graph(string title = "") = 0;
|
||||
virtual ~BaseClassifier() = default;
|
||||
const string inline getVersion() const { return "0.1.0"; };
|
||||
};
|
||||
}
|
||||
#endif
|
@ -7,7 +7,7 @@
|
||||
namespace bayesnet {
|
||||
class Network {
|
||||
private:
|
||||
map<string, std::unique_ptr<Node>> nodes;
|
||||
map<string, unique_ptr<Node>> nodes;
|
||||
map<string, vector<int>> dataset;
|
||||
bool fitted;
|
||||
float maxThreads;
|
||||
|
@ -3,7 +3,7 @@
|
||||
namespace bayesnet {
|
||||
using namespace torch;
|
||||
|
||||
TAN::TAN() : Classifier(Network(0.1)) {}
|
||||
TAN::TAN() : Classifier(Network()) {}
|
||||
|
||||
void TAN::train()
|
||||
{
|
||||
|
@ -4,5 +4,5 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc)
|
||||
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc)
|
||||
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES} ")
|
252
src/Platform/Datasets.cc
Normal file
252
src/Platform/Datasets.cc
Normal file
@ -0,0 +1,252 @@
|
||||
#include "Datasets.h"
|
||||
#include "platformUtils.h"
|
||||
#include "ArffFiles.h"
|
||||
namespace platform {
|
||||
vector<string> split(string text, char delimiter)
|
||||
{
|
||||
vector<string> result;
|
||||
stringstream ss(text);
|
||||
string token;
|
||||
while (getline(ss, token, delimiter)) {
|
||||
result.push_back(token);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void Datasets::load()
|
||||
{
|
||||
string line;
|
||||
ifstream catalog(path + "/all.txt");
|
||||
if (catalog.is_open()) {
|
||||
while (getline(catalog, line)) {
|
||||
vector<string> tokens = split(line, ',');
|
||||
string name = tokens[0];
|
||||
string className = tokens[1];
|
||||
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
|
||||
}
|
||||
catalog.close();
|
||||
} else {
|
||||
throw invalid_argument("Unable to open catalog file. [" + path + "/all.txt" + "]");
|
||||
}
|
||||
}
|
||||
Dataset& Datasets::getDataset(string name)
|
||||
{
|
||||
if (datasets.find(name) == datasets.end()) {
|
||||
throw invalid_argument("Dataset not found.");
|
||||
}
|
||||
return *datasets[name];
|
||||
}
|
||||
vector<string> Datasets::getNames()
|
||||
{
|
||||
vector<string> result;
|
||||
for (auto& d : datasets) {
|
||||
result.push_back(d.first);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
vector<string> Datasets::getFeatures(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (dataset.isLoaded()) {
|
||||
return dataset.getFeatures();
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
map<string, vector<int>> Datasets::getStates(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (dataset.isLoaded()) {
|
||||
return dataset.getStates();
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (!dataset.isLoaded()) {
|
||||
dataset.load();
|
||||
}
|
||||
return dataset.getVectors();
|
||||
}
|
||||
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (!dataset.isLoaded()) {
|
||||
dataset.load();
|
||||
}
|
||||
return dataset.getVectorsDiscretized();
|
||||
}
|
||||
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (!dataset.isLoaded()) {
|
||||
dataset.load();
|
||||
}
|
||||
return dataset.getTensors();
|
||||
}
|
||||
Dataset::Dataset(Dataset& dataset)
|
||||
{
|
||||
path = dataset.path;
|
||||
name = dataset.name;
|
||||
className = dataset.className;
|
||||
n_samples = dataset.n_samples;
|
||||
n_features = dataset.n_features;
|
||||
features = dataset.features;
|
||||
states = dataset.states;
|
||||
loaded = dataset.loaded;
|
||||
discretize = dataset.discretize;
|
||||
X = dataset.X;
|
||||
y = dataset.y;
|
||||
Xv = dataset.Xv;
|
||||
Xd = dataset.Xd;
|
||||
yv = dataset.yv;
|
||||
fileType = dataset.fileType;
|
||||
}
|
||||
string Dataset::getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
string Dataset::getClassName()
|
||||
{
|
||||
return className;
|
||||
}
|
||||
vector<string> Dataset::getFeatures()
|
||||
{
|
||||
if (loaded) {
|
||||
return features;
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Dataset::getNFeatures()
|
||||
{
|
||||
if (loaded) {
|
||||
return n_features;
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Dataset::getNSamples()
|
||||
{
|
||||
if (loaded) {
|
||||
return n_samples;
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
map<string, vector<int>> Dataset::getStates()
|
||||
{
|
||||
if (loaded) {
|
||||
return states;
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
|
||||
{
|
||||
if (loaded) {
|
||||
return { Xv, yv };
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
|
||||
{
|
||||
if (loaded) {
|
||||
return { Xd, yv };
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
|
||||
{
|
||||
if (loaded) {
|
||||
buildTensors();
|
||||
return { X, y };
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
void Dataset::load_csv()
|
||||
{
|
||||
string line;
|
||||
ifstream file(path + "/" + name + ".csv");
|
||||
if (file.is_open()) {
|
||||
getline(file, line);
|
||||
vector<string> tokens = split(line, ',');
|
||||
features = vector<string>(tokens.begin(), tokens.end() - 1);
|
||||
className = tokens.back();
|
||||
for (auto i = 0; i < features.size(); ++i) {
|
||||
Xv.push_back(vector<float>());
|
||||
}
|
||||
while (getline(file, line)) {
|
||||
tokens = split(line, ',');
|
||||
for (auto i = 0; i < features.size(); ++i) {
|
||||
Xv[i].push_back(stof(tokens[i]));
|
||||
}
|
||||
yv.push_back(stoi(tokens.back()));
|
||||
}
|
||||
file.close();
|
||||
} else {
|
||||
throw invalid_argument("Unable to open dataset file.");
|
||||
}
|
||||
}
|
||||
void Dataset::computeStates()
|
||||
{
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()));
|
||||
iota(Xd[i].begin(), Xd[i].end(), 0);
|
||||
}
|
||||
states[className] = vector<int>(*max_element(yv.begin(), yv.end()));
|
||||
iota(yv.begin(), yv.end(), 0);
|
||||
}
|
||||
void Dataset::load_arff()
|
||||
{
|
||||
auto arff = ArffFiles();
|
||||
arff.load(path + "/" + name + ".arff", className);
|
||||
// Get Dataset X, y
|
||||
Xv = arff.getX();
|
||||
yv = arff.getY();
|
||||
// Get className & Features
|
||||
auto className = arff.getClassName();
|
||||
vector<string> features;
|
||||
for (auto feature : arff.getAttributes()) {
|
||||
features.push_back(feature.first);
|
||||
}
|
||||
}
|
||||
void Dataset::load()
|
||||
{
|
||||
if (loaded) {
|
||||
return;
|
||||
}
|
||||
if (fileType == CSV) {
|
||||
load_csv();
|
||||
} else if (fileType == ARFF) {
|
||||
load_arff();
|
||||
}
|
||||
if (discretize) {
|
||||
Xd = discretizeDataset(Xv, yv);
|
||||
computeStates();
|
||||
n_samples = Xd[0].size();
|
||||
n_features = Xd.size();
|
||||
}
|
||||
loaded = true;
|
||||
}
|
||||
void Dataset::buildTensors()
|
||||
{
|
||||
if (discretize) {
|
||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
|
||||
} else {
|
||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
|
||||
}
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
if (discretize) {
|
||||
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||
} else {
|
||||
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
||||
}
|
||||
y = torch::tensor(yv, torch::kInt32);
|
||||
}
|
||||
}
|
||||
}
|
63
src/Platform/Datasets.h
Normal file
63
src/Platform/Datasets.h
Normal file
@ -0,0 +1,63 @@
|
||||
#ifndef DATASETS_H
|
||||
#define DATASETS_H
|
||||
#include <torch/torch.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
namespace platform {
|
||||
using namespace std;
|
||||
enum fileType_t { CSV, ARFF };
|
||||
class Dataset {
|
||||
private:
|
||||
string path;
|
||||
string name;
|
||||
fileType_t fileType;
|
||||
string className;
|
||||
int n_samples, n_features;
|
||||
vector<string> features;
|
||||
map<string, vector<int>> states;
|
||||
bool loaded;
|
||||
bool discretize;
|
||||
torch::Tensor X, y;
|
||||
vector<vector<float>> Xv;
|
||||
vector<vector<int>> Xd;
|
||||
vector<int> yv;
|
||||
void buildTensors();
|
||||
void load_csv();
|
||||
void load_arff();
|
||||
void computeStates();
|
||||
public:
|
||||
Dataset(string path, string name, string className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
|
||||
Dataset(Dataset&);
|
||||
string getName();
|
||||
string getClassName();
|
||||
vector<string> getFeatures();
|
||||
map<string, vector<int>> getStates();
|
||||
pair<vector<vector<float>>&, vector<int>&> getVectors();
|
||||
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
|
||||
pair<torch::Tensor&, torch::Tensor&> getTensors();
|
||||
int getNFeatures();
|
||||
int getNSamples();
|
||||
void load();
|
||||
const bool inline isLoaded() const { return loaded; };
|
||||
};
|
||||
class Datasets {
|
||||
private:
|
||||
string path;
|
||||
fileType_t fileType;
|
||||
map<string, unique_ptr<Dataset>> datasets;
|
||||
bool discretize;
|
||||
void load(); // Loads the list of datasets
|
||||
public:
|
||||
Datasets(string path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
|
||||
Dataset& getDataset(string name);
|
||||
vector<string> getNames();
|
||||
vector<string> getFeatures(string name);
|
||||
map<string, vector<int>> getStates(string name);
|
||||
pair<vector<vector<float>>&, vector<int>&> getVectors(string name);
|
||||
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name);
|
||||
pair<torch::Tensor&, torch::Tensor&> getTensors(string name);
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
@ -101,6 +101,7 @@ namespace platform {
|
||||
Timer train_timer, test_timer;
|
||||
for (int i = 0; i < k; i++) {
|
||||
bayesnet::BaseClassifier* model = classifiers[model_name];
|
||||
result.setModelVersion(model->getVersion());
|
||||
train_timer.start();
|
||||
auto [train, test] = fold->getFold(i);
|
||||
auto train_t = torch::tensor(train);
|
||||
|
@ -24,7 +24,7 @@ namespace platform {
|
||||
};
|
||||
class Result {
|
||||
private:
|
||||
string dataset, hyperparameters;
|
||||
string dataset, hyperparameters, model_version;
|
||||
int samples, features, classes;
|
||||
float score_train, score_test, score_train_std, score_test_std, train_time, train_time_std, test_time, test_time_std;
|
||||
float nodes, leaves, depth;
|
||||
@ -46,6 +46,7 @@ namespace platform {
|
||||
Result& setNodes(float nodes) { this->nodes = nodes; return *this; }
|
||||
Result& setLeaves(float leaves) { this->leaves = leaves; return *this; }
|
||||
Result& setDepth(float depth) { this->depth = depth; return *this; }
|
||||
Result& setModelVersion(string model_version) { this->model_version = model_version; return *this; }
|
||||
const float get_score_train() const { return score_train; }
|
||||
float get_score_test() { return score_test; }
|
||||
const string& getDataset() const { return dataset; }
|
||||
@ -64,6 +65,7 @@ namespace platform {
|
||||
const float getNodes() const { return nodes; }
|
||||
const float getLeaves() const { return leaves; }
|
||||
const float getDepth() const { return depth; }
|
||||
const string& getModelVersion() const { return model_version; }
|
||||
};
|
||||
class Experiment {
|
||||
private:
|
||||
|
@ -1,49 +1,17 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <torch/torch.h>
|
||||
#include <thread>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include "ArffFiles.h"
|
||||
#include "Network.h"
|
||||
#include "BayesMetrics.h"
|
||||
#include "CPPFImdlp.h"
|
||||
#include "KDB.h"
|
||||
#include "SPODE.h"
|
||||
#include "AODE.h"
|
||||
#include "TAN.h"
|
||||
#include "platformUtils.h"
|
||||
#include "Experiment.h"
|
||||
#include "Folding.h"
|
||||
#include "Datasets.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
argparse::ArgumentParser manageArguments(int argc, char** argv)
|
||||
{
|
||||
map<string, bool> datasets = {
|
||||
{"diabetes", true},
|
||||
{"ecoli", true},
|
||||
{"glass", true},
|
||||
{"iris", true},
|
||||
{"kdd_JapaneseVowels", false},
|
||||
{"letter", true},
|
||||
{"liver-disorders", true},
|
||||
{"mfeat-factors", true},
|
||||
};
|
||||
auto valid_datasets = vector<string>();
|
||||
for (auto dataset : datasets) {
|
||||
valid_datasets.push_back(dataset.first);
|
||||
}
|
||||
argparse::ArgumentParser program("BayesNetSample");
|
||||
program.add_argument("-d", "--dataset")
|
||||
.help("Dataset file name")
|
||||
.action([valid_datasets](const std::string& value) {
|
||||
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
|
||||
return value;
|
||||
}
|
||||
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
||||
}
|
||||
);
|
||||
.help("Dataset file name");
|
||||
program.add_argument("-p", "--path")
|
||||
.help("folder where the data files are located, default")
|
||||
.default_value(string{ PATH }
|
||||
@ -89,7 +57,7 @@ int main(int argc, char** argv)
|
||||
n_folds = program.get<int>("folds");
|
||||
seed = program.get<int>("seed");
|
||||
complete_file_name = path + file_name + ".arff";
|
||||
class_last = datasets[file_name];
|
||||
class_last = false;//datasets[file_name];
|
||||
title = program.get<string>("title");
|
||||
if (!file_exists(complete_file_name)) {
|
||||
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
||||
@ -100,24 +68,54 @@ int main(int argc, char** argv)
|
||||
cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
return program;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
auto program = manageArguments(argc, argv);
|
||||
auto file_name = program.get<string>("dataset");
|
||||
auto path = program.get<string>("path");
|
||||
auto model_name = program.get<string>("model");
|
||||
auto discretize_dataset = program.get<bool>("discretize");
|
||||
auto stratified = program.get<bool>("stratified");
|
||||
auto n_folds = program.get<int>("folds");
|
||||
auto seed = program.get<int>("seed");
|
||||
vector<string> filesToProcess;
|
||||
auto datasets = platform::Datasets(path, true, platform::ARFF);
|
||||
if (file_name != "") {
|
||||
filesToProcess.push_back(file_name);
|
||||
} else {
|
||||
filesToProcess = platform::Datasets(path, true, platform::ARFF).getNames();
|
||||
}
|
||||
auto title = program.get<string>("title");
|
||||
|
||||
/*
|
||||
* Begin Processing
|
||||
*/
|
||||
auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset);
|
||||
Fold* fold;
|
||||
if (stratified)
|
||||
fold = new StratifiedKFold(n_folds, y, seed);
|
||||
else
|
||||
fold = new KFold(n_folds, y.numel(), seed);
|
||||
auto experiment = platform::Experiment();
|
||||
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0");
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setModelVersion("1...0").setPlatform("BayesNet");
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("BayesNet");
|
||||
experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy");
|
||||
platform::Timer timer;
|
||||
timer.start();
|
||||
auto result = platform::cross_validation(fold, model_name, X, y, features, className, states);
|
||||
result.setDataset(file_name);
|
||||
experiment.addResult(result);
|
||||
for (auto fileName : filesToProcess) {
|
||||
cout << "Processing " << fileName << endl;
|
||||
auto [X, y] = datasets.getTensors(fileName);
|
||||
// auto states = datasets.getStates(fileName);
|
||||
// auto features = datasets.getFeatures(fileName);
|
||||
// auto className = datasets.getDataset(fileName).getClassName();
|
||||
// Fold* fold;
|
||||
// if (stratified)
|
||||
// fold = new StratifiedKFold(n_folds, y, seed);
|
||||
// else
|
||||
// fold = new KFold(n_folds, y.numel(), seed);
|
||||
// auto result = platform::cross_validation(fold, model_name, X, y, features, className, states);
|
||||
// result.setDataset(file_name);
|
||||
// experiment.setModelVersion(result.getModelVersion());
|
||||
// experiment.addResult(result);
|
||||
// delete fold;
|
||||
}
|
||||
experiment.setDuration(timer.getDuration());
|
||||
experiment.save(path);
|
||||
experiment.show();
|
||||
|
@ -12,6 +12,7 @@ const string PATH = "../../data/";
|
||||
|
||||
bool file_exists(const std::string& name);
|
||||
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
|
||||
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y);
|
||||
pair<torch::Tensor, map<string, vector<int>>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className);
|
||||
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name);
|
||||
tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(string path, string name, bool class_last, bool discretize_dataset);
|
||||
|
Loading…
Reference in New Issue
Block a user