Complete Experiment
This commit is contained in:
parent
bc214a496c
commit
3d8fea7a37
8
.vscode/launch.json
vendored
8
.vscode/launch.json
vendored
@ -23,15 +23,15 @@
|
||||
"name": "experiment",
|
||||
"program": "${workspaceFolder}/build/src/Platform/main",
|
||||
"args": [
|
||||
"-d",
|
||||
"iris",
|
||||
"-m",
|
||||
"TAN",
|
||||
"-p",
|
||||
"../../../data/",
|
||||
"datasets",
|
||||
"--discretize",
|
||||
"-f",
|
||||
"2"
|
||||
"5",
|
||||
"--title",
|
||||
"Debug test"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build/src/Platform",
|
||||
},
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -118,6 +119,7 @@ void ArffFiles::generateDataset(int labelIndex)
|
||||
{
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
auto yy = vector<string>(lines.size(), "");
|
||||
auto removeLines = vector<int>(); // Lines with missing values
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
@ -127,10 +129,20 @@ void ArffFiles::generateDataset(int labelIndex)
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
X[xIndex++][i] = stof(value);
|
||||
if (value == "?") {
|
||||
X[xIndex++][i] = -1;
|
||||
removeLines.push_back(i);
|
||||
} else
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto i : removeLines) {
|
||||
yy.erase(yy.begin() + i);
|
||||
for (auto& x : X) {
|
||||
x.erase(x.begin() + i);
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
|
||||
|
@ -28,13 +28,6 @@ namespace platform {
|
||||
throw invalid_argument("Unable to open catalog file. [" + path + "/all.txt" + "]");
|
||||
}
|
||||
}
|
||||
Dataset& Datasets::getDataset(string name)
|
||||
{
|
||||
if (datasets.find(name) == datasets.end()) {
|
||||
throw invalid_argument("Dataset not found.");
|
||||
}
|
||||
return *datasets[name];
|
||||
}
|
||||
vector<string> Datasets::getNames()
|
||||
{
|
||||
vector<string> result;
|
||||
@ -45,45 +38,56 @@ namespace platform {
|
||||
}
|
||||
vector<string> Datasets::getFeatures(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (dataset.isLoaded()) {
|
||||
return dataset.getFeatures();
|
||||
if (datasets[name]->isLoaded()) {
|
||||
return datasets[name]->getFeatures();
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
map<string, vector<int>> Datasets::getStates(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (dataset.isLoaded()) {
|
||||
return dataset.getStates();
|
||||
if (datasets[name]->isLoaded()) {
|
||||
return datasets[name]->getStates();
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
string Datasets::getClassName(string name)
|
||||
{
|
||||
if (datasets[name]->isLoaded()) {
|
||||
return datasets[name]->getClassName();
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Datasets::getNSamples(string name)
|
||||
{
|
||||
if (datasets[name]->isLoaded()) {
|
||||
return datasets[name]->getNSamples();
|
||||
} else {
|
||||
throw invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (!dataset.isLoaded()) {
|
||||
dataset.load();
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return dataset.getVectors();
|
||||
return datasets[name]->getVectors();
|
||||
}
|
||||
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (!dataset.isLoaded()) {
|
||||
dataset.load();
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return dataset.getVectorsDiscretized();
|
||||
return datasets[name]->getVectorsDiscretized();
|
||||
}
|
||||
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name)
|
||||
{
|
||||
auto dataset = getDataset(name);
|
||||
if (!dataset.isLoaded()) {
|
||||
dataset.load();
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return dataset.getTensors();
|
||||
return datasets[name]->getTensors();
|
||||
}
|
||||
Dataset::Dataset(Dataset& dataset)
|
||||
{
|
||||
@ -195,11 +199,11 @@ namespace platform {
|
||||
void Dataset::computeStates()
|
||||
{
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()));
|
||||
iota(Xd[i].begin(), Xd[i].end(), 0);
|
||||
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
|
||||
iota(begin(states[features[i]]), end(states[features[i]]), 0);
|
||||
}
|
||||
states[className] = vector<int>(*max_element(yv.begin(), yv.end()));
|
||||
iota(yv.begin(), yv.end(), 0);
|
||||
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
|
||||
iota(begin(states[className]), end(states[className]), 0);
|
||||
}
|
||||
void Dataset::load_arff()
|
||||
{
|
||||
@ -209,8 +213,7 @@ namespace platform {
|
||||
Xv = arff.getX();
|
||||
yv = arff.getY();
|
||||
// Get className & Features
|
||||
auto className = arff.getClassName();
|
||||
vector<string> features;
|
||||
className = arff.getClassName();
|
||||
for (auto feature : arff.getAttributes()) {
|
||||
features.push_back(feature.first);
|
||||
}
|
||||
@ -246,7 +249,7 @@ namespace platform {
|
||||
} else {
|
||||
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
||||
}
|
||||
y = torch::tensor(yv, torch::kInt32);
|
||||
}
|
||||
y = torch::tensor(yv, torch::kInt32);
|
||||
}
|
||||
}
|
@ -50,9 +50,10 @@ namespace platform {
|
||||
void load(); // Loads the list of datasets
|
||||
public:
|
||||
Datasets(string path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
|
||||
Dataset& getDataset(string name);
|
||||
vector<string> getNames();
|
||||
vector<string> getFeatures(string name);
|
||||
int getNSamples(string name);
|
||||
string getClassName(string name);
|
||||
map<string, vector<int>> getStates(string name);
|
||||
pair<vector<vector<float>>&, vector<int>&> getVectors(string name);
|
||||
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name);
|
||||
|
@ -79,17 +79,16 @@ namespace platform {
|
||||
file << data;
|
||||
file.close();
|
||||
}
|
||||
Result cross_validation(Fold* fold, string model_name, torch::Tensor& X, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||
Result cross_validation(Fold* fold, string model_name, torch::Tensor& Xt, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||
{
|
||||
auto classifiers = map<string, bayesnet::BaseClassifier*>({
|
||||
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
|
||||
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
|
||||
}
|
||||
);
|
||||
auto Xt = torch::transpose(X, 0, 1);
|
||||
auto result = Result();
|
||||
auto [values, counts] = at::_unique(y);
|
||||
result.setSamples(X.size(0)).setFeatures(X.size(1)).setClasses(values.size(0));
|
||||
result.setSamples(Xt.size(1)).setFeatures(Xt.size(0)).setClasses(values.size(0));
|
||||
auto k = fold->getNumberOfFolds();
|
||||
auto accuracy_test = torch::zeros({ k }, torch::kFloat64);
|
||||
auto accuracy_train = torch::zeros({ k }, torch::kFloat64);
|
||||
@ -99,6 +98,7 @@ namespace platform {
|
||||
auto edges = torch::zeros({ k }, torch::kFloat64);
|
||||
auto num_states = torch::zeros({ k }, torch::kFloat64);
|
||||
Timer train_timer, test_timer;
|
||||
cout << "doing Fold: " << flush;
|
||||
for (int i = 0; i < k; i++) {
|
||||
bayesnet::BaseClassifier* model = classifiers[model_name];
|
||||
result.setModelVersion(model->getVersion());
|
||||
@ -110,15 +110,11 @@ namespace platform {
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = Xt.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
cout << i + 1 << ", " << flush;
|
||||
model->fit(X_train, y_train, features, className, states);
|
||||
nodes[i] = model->getNumberOfNodes();
|
||||
edges[i] = model->getNumberOfEdges();
|
||||
num_states[i] = model->getNumberOfStates();
|
||||
cout << "Training Fold " << i + 1 << endl;
|
||||
cout << "X_train: " << X_train.sizes() << endl;
|
||||
cout << "y_train: " << y_train.sizes() << endl;
|
||||
cout << "X_test: " << X_test.sizes() << endl;
|
||||
cout << "y_test: " << y_test.sizes() << endl;
|
||||
train_time[i] = train_timer.getDuration();
|
||||
auto accuracy_train_value = model->score(X_train, y_train);
|
||||
test_timer.start();
|
||||
@ -127,6 +123,7 @@ namespace platform {
|
||||
accuracy_train[i] = accuracy_train_value;
|
||||
accuracy_test[i] = accuracy_test_value;
|
||||
}
|
||||
cout << "end." << endl;
|
||||
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
||||
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
||||
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||
|
@ -6,12 +6,12 @@
|
||||
|
||||
|
||||
using namespace std;
|
||||
const string PATH_RESULTS = "results";
|
||||
|
||||
argparse::ArgumentParser manageArguments(int argc, char** argv)
|
||||
{
|
||||
argparse::ArgumentParser program("BayesNetSample");
|
||||
program.add_argument("-d", "--dataset")
|
||||
.help("Dataset file name");
|
||||
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
|
||||
program.add_argument("-p", "--path")
|
||||
.help("folder where the data files are located, default")
|
||||
.default_value(string{ PATH }
|
||||
@ -59,9 +59,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
|
||||
complete_file_name = path + file_name + ".arff";
|
||||
class_last = false;//datasets[file_name];
|
||||
title = program.get<string>("title");
|
||||
if (!file_exists(complete_file_name)) {
|
||||
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
||||
}
|
||||
}
|
||||
catch (const exception& err) {
|
||||
cerr << err.what() << endl;
|
||||
@ -98,26 +95,29 @@ int main(int argc, char** argv)
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("BayesNet");
|
||||
experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy");
|
||||
platform::Timer timer;
|
||||
cout << "*** Starting experiment: " << title << " ***" << endl;
|
||||
timer.start();
|
||||
for (auto fileName : filesToProcess) {
|
||||
cout << "Processing " << fileName << endl;
|
||||
cout << "- " << fileName << " ";
|
||||
auto [X, y] = datasets.getTensors(fileName);
|
||||
// auto states = datasets.getStates(fileName);
|
||||
// auto features = datasets.getFeatures(fileName);
|
||||
// auto className = datasets.getDataset(fileName).getClassName();
|
||||
// Fold* fold;
|
||||
// if (stratified)
|
||||
// fold = new StratifiedKFold(n_folds, y, seed);
|
||||
// else
|
||||
// fold = new KFold(n_folds, y.numel(), seed);
|
||||
// auto result = platform::cross_validation(fold, model_name, X, y, features, className, states);
|
||||
// result.setDataset(file_name);
|
||||
// experiment.setModelVersion(result.getModelVersion());
|
||||
// experiment.addResult(result);
|
||||
// delete fold;
|
||||
auto states = datasets.getStates(fileName);
|
||||
auto features = datasets.getFeatures(fileName);
|
||||
auto samples = datasets.getNSamples(fileName);
|
||||
auto className = datasets.getClassName(fileName);
|
||||
cout << " (" << samples << ", " << features.size() << ") " << flush;
|
||||
Fold* fold;
|
||||
if (stratified)
|
||||
fold = new StratifiedKFold(n_folds, y, seed);
|
||||
else
|
||||
fold = new KFold(n_folds, samples, seed);
|
||||
auto result = platform::cross_validation(fold, model_name, X, y, features, className, states);
|
||||
result.setDataset(file_name);
|
||||
experiment.setModelVersion(result.getModelVersion());
|
||||
experiment.addResult(result);
|
||||
delete fold;
|
||||
}
|
||||
experiment.setDuration(timer.getDuration());
|
||||
experiment.save(path);
|
||||
experiment.show();
|
||||
experiment.save(PATH_RESULTS);
|
||||
cout << "Done!" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user