Complete Experiment
This commit is contained in:
parent
bc214a496c
commit
3d8fea7a37
8
.vscode/launch.json
vendored
8
.vscode/launch.json
vendored
@ -23,15 +23,15 @@
|
|||||||
"name": "experiment",
|
"name": "experiment",
|
||||||
"program": "${workspaceFolder}/build/src/Platform/main",
|
"program": "${workspaceFolder}/build/src/Platform/main",
|
||||||
"args": [
|
"args": [
|
||||||
"-d",
|
|
||||||
"iris",
|
|
||||||
"-m",
|
"-m",
|
||||||
"TAN",
|
"TAN",
|
||||||
"-p",
|
"-p",
|
||||||
"../../../data/",
|
"datasets",
|
||||||
"--discretize",
|
"--discretize",
|
||||||
"-f",
|
"-f",
|
||||||
"2"
|
"5",
|
||||||
|
"--title",
|
||||||
|
"Debug test"
|
||||||
],
|
],
|
||||||
"cwd": "${workspaceFolder}/build/src/Platform",
|
"cwd": "${workspaceFolder}/build/src/Platform",
|
||||||
},
|
},
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -118,6 +119,7 @@ void ArffFiles::generateDataset(int labelIndex)
|
|||||||
{
|
{
|
||||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||||
auto yy = vector<string>(lines.size(), "");
|
auto yy = vector<string>(lines.size(), "");
|
||||||
|
auto removeLines = vector<int>(); // Lines with missing values
|
||||||
for (size_t i = 0; i < lines.size(); i++) {
|
for (size_t i = 0; i < lines.size(); i++) {
|
||||||
stringstream ss(lines[i]);
|
stringstream ss(lines[i]);
|
||||||
string value;
|
string value;
|
||||||
@ -127,10 +129,20 @@ void ArffFiles::generateDataset(int labelIndex)
|
|||||||
if (pos++ == labelIndex) {
|
if (pos++ == labelIndex) {
|
||||||
yy[i] = value;
|
yy[i] = value;
|
||||||
} else {
|
} else {
|
||||||
X[xIndex++][i] = stof(value);
|
if (value == "?") {
|
||||||
|
X[xIndex++][i] = -1;
|
||||||
|
removeLines.push_back(i);
|
||||||
|
} else
|
||||||
|
X[xIndex++][i] = stof(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (auto i : removeLines) {
|
||||||
|
yy.erase(yy.begin() + i);
|
||||||
|
for (auto& x : X) {
|
||||||
|
x.erase(x.begin() + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
y = factorize(yy);
|
y = factorize(yy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,13 +28,6 @@ namespace platform {
|
|||||||
throw invalid_argument("Unable to open catalog file. [" + path + "/all.txt" + "]");
|
throw invalid_argument("Unable to open catalog file. [" + path + "/all.txt" + "]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Dataset& Datasets::getDataset(string name)
|
|
||||||
{
|
|
||||||
if (datasets.find(name) == datasets.end()) {
|
|
||||||
throw invalid_argument("Dataset not found.");
|
|
||||||
}
|
|
||||||
return *datasets[name];
|
|
||||||
}
|
|
||||||
vector<string> Datasets::getNames()
|
vector<string> Datasets::getNames()
|
||||||
{
|
{
|
||||||
vector<string> result;
|
vector<string> result;
|
||||||
@ -45,45 +38,56 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
vector<string> Datasets::getFeatures(string name)
|
vector<string> Datasets::getFeatures(string name)
|
||||||
{
|
{
|
||||||
auto dataset = getDataset(name);
|
if (datasets[name]->isLoaded()) {
|
||||||
if (dataset.isLoaded()) {
|
return datasets[name]->getFeatures();
|
||||||
return dataset.getFeatures();
|
|
||||||
} else {
|
} else {
|
||||||
throw invalid_argument("Dataset not loaded.");
|
throw invalid_argument("Dataset not loaded.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
map<string, vector<int>> Datasets::getStates(string name)
|
map<string, vector<int>> Datasets::getStates(string name)
|
||||||
{
|
{
|
||||||
auto dataset = getDataset(name);
|
if (datasets[name]->isLoaded()) {
|
||||||
if (dataset.isLoaded()) {
|
return datasets[name]->getStates();
|
||||||
return dataset.getStates();
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
string Datasets::getClassName(string name)
|
||||||
|
{
|
||||||
|
if (datasets[name]->isLoaded()) {
|
||||||
|
return datasets[name]->getClassName();
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Dataset not loaded.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int Datasets::getNSamples(string name)
|
||||||
|
{
|
||||||
|
if (datasets[name]->isLoaded()) {
|
||||||
|
return datasets[name]->getNSamples();
|
||||||
} else {
|
} else {
|
||||||
throw invalid_argument("Dataset not loaded.");
|
throw invalid_argument("Dataset not loaded.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name)
|
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name)
|
||||||
{
|
{
|
||||||
auto dataset = getDataset(name);
|
if (!datasets[name]->isLoaded()) {
|
||||||
if (!dataset.isLoaded()) {
|
datasets[name]->load();
|
||||||
dataset.load();
|
|
||||||
}
|
}
|
||||||
return dataset.getVectors();
|
return datasets[name]->getVectors();
|
||||||
}
|
}
|
||||||
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name)
|
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name)
|
||||||
{
|
{
|
||||||
auto dataset = getDataset(name);
|
if (!datasets[name]->isLoaded()) {
|
||||||
if (!dataset.isLoaded()) {
|
datasets[name]->load();
|
||||||
dataset.load();
|
|
||||||
}
|
}
|
||||||
return dataset.getVectorsDiscretized();
|
return datasets[name]->getVectorsDiscretized();
|
||||||
}
|
}
|
||||||
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name)
|
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name)
|
||||||
{
|
{
|
||||||
auto dataset = getDataset(name);
|
if (!datasets[name]->isLoaded()) {
|
||||||
if (!dataset.isLoaded()) {
|
datasets[name]->load();
|
||||||
dataset.load();
|
|
||||||
}
|
}
|
||||||
return dataset.getTensors();
|
return datasets[name]->getTensors();
|
||||||
}
|
}
|
||||||
Dataset::Dataset(Dataset& dataset)
|
Dataset::Dataset(Dataset& dataset)
|
||||||
{
|
{
|
||||||
@ -195,11 +199,11 @@ namespace platform {
|
|||||||
void Dataset::computeStates()
|
void Dataset::computeStates()
|
||||||
{
|
{
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
for (int i = 0; i < features.size(); ++i) {
|
||||||
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()));
|
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
|
||||||
iota(Xd[i].begin(), Xd[i].end(), 0);
|
iota(begin(states[features[i]]), end(states[features[i]]), 0);
|
||||||
}
|
}
|
||||||
states[className] = vector<int>(*max_element(yv.begin(), yv.end()));
|
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
|
||||||
iota(yv.begin(), yv.end(), 0);
|
iota(begin(states[className]), end(states[className]), 0);
|
||||||
}
|
}
|
||||||
void Dataset::load_arff()
|
void Dataset::load_arff()
|
||||||
{
|
{
|
||||||
@ -209,8 +213,7 @@ namespace platform {
|
|||||||
Xv = arff.getX();
|
Xv = arff.getX();
|
||||||
yv = arff.getY();
|
yv = arff.getY();
|
||||||
// Get className & Features
|
// Get className & Features
|
||||||
auto className = arff.getClassName();
|
className = arff.getClassName();
|
||||||
vector<string> features;
|
|
||||||
for (auto feature : arff.getAttributes()) {
|
for (auto feature : arff.getAttributes()) {
|
||||||
features.push_back(feature.first);
|
features.push_back(feature.first);
|
||||||
}
|
}
|
||||||
@ -246,7 +249,7 @@ namespace platform {
|
|||||||
} else {
|
} else {
|
||||||
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
||||||
}
|
}
|
||||||
y = torch::tensor(yv, torch::kInt32);
|
|
||||||
}
|
}
|
||||||
|
y = torch::tensor(yv, torch::kInt32);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -50,9 +50,10 @@ namespace platform {
|
|||||||
void load(); // Loads the list of datasets
|
void load(); // Loads the list of datasets
|
||||||
public:
|
public:
|
||||||
Datasets(string path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
|
Datasets(string path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
|
||||||
Dataset& getDataset(string name);
|
|
||||||
vector<string> getNames();
|
vector<string> getNames();
|
||||||
vector<string> getFeatures(string name);
|
vector<string> getFeatures(string name);
|
||||||
|
int getNSamples(string name);
|
||||||
|
string getClassName(string name);
|
||||||
map<string, vector<int>> getStates(string name);
|
map<string, vector<int>> getStates(string name);
|
||||||
pair<vector<vector<float>>&, vector<int>&> getVectors(string name);
|
pair<vector<vector<float>>&, vector<int>&> getVectors(string name);
|
||||||
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name);
|
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name);
|
||||||
|
@ -79,17 +79,16 @@ namespace platform {
|
|||||||
file << data;
|
file << data;
|
||||||
file.close();
|
file.close();
|
||||||
}
|
}
|
||||||
Result cross_validation(Fold* fold, string model_name, torch::Tensor& X, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
Result cross_validation(Fold* fold, string model_name, torch::Tensor& Xt, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||||
{
|
{
|
||||||
auto classifiers = map<string, bayesnet::BaseClassifier*>({
|
auto classifiers = map<string, bayesnet::BaseClassifier*>({
|
||||||
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
|
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
|
||||||
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
|
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
auto Xt = torch::transpose(X, 0, 1);
|
|
||||||
auto result = Result();
|
auto result = Result();
|
||||||
auto [values, counts] = at::_unique(y);
|
auto [values, counts] = at::_unique(y);
|
||||||
result.setSamples(X.size(0)).setFeatures(X.size(1)).setClasses(values.size(0));
|
result.setSamples(Xt.size(1)).setFeatures(Xt.size(0)).setClasses(values.size(0));
|
||||||
auto k = fold->getNumberOfFolds();
|
auto k = fold->getNumberOfFolds();
|
||||||
auto accuracy_test = torch::zeros({ k }, torch::kFloat64);
|
auto accuracy_test = torch::zeros({ k }, torch::kFloat64);
|
||||||
auto accuracy_train = torch::zeros({ k }, torch::kFloat64);
|
auto accuracy_train = torch::zeros({ k }, torch::kFloat64);
|
||||||
@ -99,6 +98,7 @@ namespace platform {
|
|||||||
auto edges = torch::zeros({ k }, torch::kFloat64);
|
auto edges = torch::zeros({ k }, torch::kFloat64);
|
||||||
auto num_states = torch::zeros({ k }, torch::kFloat64);
|
auto num_states = torch::zeros({ k }, torch::kFloat64);
|
||||||
Timer train_timer, test_timer;
|
Timer train_timer, test_timer;
|
||||||
|
cout << "doing Fold: " << flush;
|
||||||
for (int i = 0; i < k; i++) {
|
for (int i = 0; i < k; i++) {
|
||||||
bayesnet::BaseClassifier* model = classifiers[model_name];
|
bayesnet::BaseClassifier* model = classifiers[model_name];
|
||||||
result.setModelVersion(model->getVersion());
|
result.setModelVersion(model->getVersion());
|
||||||
@ -110,15 +110,11 @@ namespace platform {
|
|||||||
auto y_train = y.index({ train_t });
|
auto y_train = y.index({ train_t });
|
||||||
auto X_test = Xt.index({ "...", test_t });
|
auto X_test = Xt.index({ "...", test_t });
|
||||||
auto y_test = y.index({ test_t });
|
auto y_test = y.index({ test_t });
|
||||||
|
cout << i + 1 << ", " << flush;
|
||||||
model->fit(X_train, y_train, features, className, states);
|
model->fit(X_train, y_train, features, className, states);
|
||||||
nodes[i] = model->getNumberOfNodes();
|
nodes[i] = model->getNumberOfNodes();
|
||||||
edges[i] = model->getNumberOfEdges();
|
edges[i] = model->getNumberOfEdges();
|
||||||
num_states[i] = model->getNumberOfStates();
|
num_states[i] = model->getNumberOfStates();
|
||||||
cout << "Training Fold " << i + 1 << endl;
|
|
||||||
cout << "X_train: " << X_train.sizes() << endl;
|
|
||||||
cout << "y_train: " << y_train.sizes() << endl;
|
|
||||||
cout << "X_test: " << X_test.sizes() << endl;
|
|
||||||
cout << "y_test: " << y_test.sizes() << endl;
|
|
||||||
train_time[i] = train_timer.getDuration();
|
train_time[i] = train_timer.getDuration();
|
||||||
auto accuracy_train_value = model->score(X_train, y_train);
|
auto accuracy_train_value = model->score(X_train, y_train);
|
||||||
test_timer.start();
|
test_timer.start();
|
||||||
@ -127,6 +123,7 @@ namespace platform {
|
|||||||
accuracy_train[i] = accuracy_train_value;
|
accuracy_train[i] = accuracy_train_value;
|
||||||
accuracy_test[i] = accuracy_test_value;
|
accuracy_test[i] = accuracy_test_value;
|
||||||
}
|
}
|
||||||
|
cout << "end." << endl;
|
||||||
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
||||||
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
||||||
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||||
|
@ -6,12 +6,12 @@
|
|||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
const string PATH_RESULTS = "results";
|
||||||
|
|
||||||
argparse::ArgumentParser manageArguments(int argc, char** argv)
|
argparse::ArgumentParser manageArguments(int argc, char** argv)
|
||||||
{
|
{
|
||||||
argparse::ArgumentParser program("BayesNetSample");
|
argparse::ArgumentParser program("BayesNetSample");
|
||||||
program.add_argument("-d", "--dataset")
|
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
|
||||||
.help("Dataset file name");
|
|
||||||
program.add_argument("-p", "--path")
|
program.add_argument("-p", "--path")
|
||||||
.help("folder where the data files are located, default")
|
.help("folder where the data files are located, default")
|
||||||
.default_value(string{ PATH }
|
.default_value(string{ PATH }
|
||||||
@ -59,9 +59,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
|
|||||||
complete_file_name = path + file_name + ".arff";
|
complete_file_name = path + file_name + ".arff";
|
||||||
class_last = false;//datasets[file_name];
|
class_last = false;//datasets[file_name];
|
||||||
title = program.get<string>("title");
|
title = program.get<string>("title");
|
||||||
if (!file_exists(complete_file_name)) {
|
|
||||||
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (const exception& err) {
|
catch (const exception& err) {
|
||||||
cerr << err.what() << endl;
|
cerr << err.what() << endl;
|
||||||
@ -98,26 +95,29 @@ int main(int argc, char** argv)
|
|||||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("BayesNet");
|
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("BayesNet");
|
||||||
experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy");
|
experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy");
|
||||||
platform::Timer timer;
|
platform::Timer timer;
|
||||||
|
cout << "*** Starting experiment: " << title << " ***" << endl;
|
||||||
timer.start();
|
timer.start();
|
||||||
for (auto fileName : filesToProcess) {
|
for (auto fileName : filesToProcess) {
|
||||||
cout << "Processing " << fileName << endl;
|
cout << "- " << fileName << " ";
|
||||||
auto [X, y] = datasets.getTensors(fileName);
|
auto [X, y] = datasets.getTensors(fileName);
|
||||||
// auto states = datasets.getStates(fileName);
|
auto states = datasets.getStates(fileName);
|
||||||
// auto features = datasets.getFeatures(fileName);
|
auto features = datasets.getFeatures(fileName);
|
||||||
// auto className = datasets.getDataset(fileName).getClassName();
|
auto samples = datasets.getNSamples(fileName);
|
||||||
// Fold* fold;
|
auto className = datasets.getClassName(fileName);
|
||||||
// if (stratified)
|
cout << " (" << samples << ", " << features.size() << ") " << flush;
|
||||||
// fold = new StratifiedKFold(n_folds, y, seed);
|
Fold* fold;
|
||||||
// else
|
if (stratified)
|
||||||
// fold = new KFold(n_folds, y.numel(), seed);
|
fold = new StratifiedKFold(n_folds, y, seed);
|
||||||
// auto result = platform::cross_validation(fold, model_name, X, y, features, className, states);
|
else
|
||||||
// result.setDataset(file_name);
|
fold = new KFold(n_folds, samples, seed);
|
||||||
// experiment.setModelVersion(result.getModelVersion());
|
auto result = platform::cross_validation(fold, model_name, X, y, features, className, states);
|
||||||
// experiment.addResult(result);
|
result.setDataset(file_name);
|
||||||
// delete fold;
|
experiment.setModelVersion(result.getModelVersion());
|
||||||
|
experiment.addResult(result);
|
||||||
|
delete fold;
|
||||||
}
|
}
|
||||||
experiment.setDuration(timer.getDuration());
|
experiment.setDuration(timer.getDuration());
|
||||||
experiment.save(path);
|
experiment.save(PATH_RESULTS);
|
||||||
experiment.show();
|
cout << "Done!" << endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user