Refactor cross_validation

This commit is contained in:
Ricardo Montañana Gómez 2023-07-29 16:44:07 +02:00
parent b9e76becce
commit adc0ca238f
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
3 changed files with 55 additions and 49 deletions

View File

@ -88,7 +88,7 @@ namespace platform {
json data = build_json(); json data = build_json();
cout << data.dump(4) << endl; cout << data.dump(4) << endl;
} }
Result cross_validation(Fold* fold, string model_name, torch::Tensor& Xt, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states) Result Experiment::cross_validation(string model_name, torch::Tensor& Xt, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
{ {
auto classifiers = map<string, bayesnet::BaseClassifier*>({ auto classifiers = map<string, bayesnet::BaseClassifier*>({
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
@ -98,41 +98,57 @@ namespace platform {
auto result = Result(); auto result = Result();
auto [values, counts] = at::_unique(y); auto [values, counts] = at::_unique(y);
result.setSamples(Xt.size(1)).setFeatures(Xt.size(0)).setClasses(values.size(0)); result.setSamples(Xt.size(1)).setFeatures(Xt.size(0)).setClasses(values.size(0));
auto k = fold->getNumberOfFolds(); int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ k }, torch::kFloat64); auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ k }, torch::kFloat64); auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
auto train_time = torch::zeros({ k }, torch::kFloat64); auto train_time = torch::zeros({ nResults }, torch::kFloat64);
auto test_time = torch::zeros({ k }, torch::kFloat64); auto test_time = torch::zeros({ nResults }, torch::kFloat64);
auto nodes = torch::zeros({ k }, torch::kFloat64); auto nodes = torch::zeros({ nResults }, torch::kFloat64);
auto edges = torch::zeros({ k }, torch::kFloat64); auto edges = torch::zeros({ nResults }, torch::kFloat64);
auto num_states = torch::zeros({ k }, torch::kFloat64); auto num_states = torch::zeros({ nResults }, torch::kFloat64);
Timer train_timer, test_timer; Timer train_timer, test_timer;
cout << "doing Fold: " << flush; int item = 0;
for (int i = 0; i < k; i++) { for (auto seed : randomSeeds) {
bayesnet::BaseClassifier* model = classifiers[model_name]; cout << "(" << seed << ") " << flush;
train_timer.start(); Fold* fold;
auto [train, test] = fold->getFold(i); if (stratified)
auto train_t = torch::tensor(train); fold = new StratifiedKFold(nfolds, y, seed);
auto test_t = torch::tensor(test); else
auto X_train = Xt.index({ "...", train_t }); fold = new KFold(nfolds, y.size(0), seed);
auto y_train = y.index({ train_t }); cout << "doing Fold: " << flush;
auto X_test = Xt.index({ "...", test_t }); for (int nfold = 0; nfold < nfolds; nfold++) {
auto y_test = y.index({ test_t }); bayesnet::BaseClassifier* clf = classifiers[model];
cout << i + 1 << ", " << flush; setModelVersion(clf->getVersion());
model->fit(X_train, y_train, features, className, states); train_timer.start();
nodes[i] = model->getNumberOfNodes(); auto [train, test] = fold->getFold(nfold);
edges[i] = model->getNumberOfEdges(); auto train_t = torch::tensor(train);
num_states[i] = model->getNumberOfStates(); auto test_t = torch::tensor(test);
train_time[i] = train_timer.getDuration(); auto X_train = Xt.index({ "...", train_t });
auto accuracy_train_value = model->score(X_train, y_train); auto y_train = y.index({ train_t });
test_timer.start(); auto X_test = Xt.index({ "...", test_t });
auto accuracy_test_value = model->score(X_test, y_test); auto y_test = y.index({ test_t });
test_time[i] = test_timer.getDuration(); cout << nfold + 1 << ", " << flush;
accuracy_train[i] = accuracy_train_value; clf->fit(X_train, y_train, features, className, states);
accuracy_test[i] = accuracy_test_value; nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
auto accuracy_train_value = clf->score(X_train, y_train);
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
// Store results and times in vector
result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
}
cout << "end. " << flush;
delete fold;
} }
cout << "end. " << flush;
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>()); result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>()); result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>()); result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());

View File

@ -105,9 +105,9 @@ namespace platform {
Experiment& setDuration(float duration) { this->duration = duration; return *this; } Experiment& setDuration(float duration) { this->duration = duration; return *this; }
string get_file_name(); string get_file_name();
void save(string path); void save(string path);
Result cross_validation(const string& path, const string& fileName); //Result cross_validation(const string& path, const string& fileName);
Result cross_validation(string model_name, torch::Tensor& X, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states);
void show(); void show();
}; };
Result cross_validation(Fold* fold, string model_name, torch::Tensor& X, torch::Tensor& y, vector<string> features, string className, map<string, vector<int>> states);
} }
#endif #endif

View File

@ -126,19 +126,9 @@ int main(int argc, char** argv)
auto samples = datasets.getNSamples(fileName); auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName); auto className = datasets.getClassName(fileName);
cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
for (auto seed : seeds) { auto result = experiment.cross_validation(model_name, X, y, features, className, states);
cout << "(" << seed << ") " << flush; result.setDataset(fileName);
Fold* fold; experiment.addResult(result);
if (stratified)
fold = new StratifiedKFold(n_folds, y, seed);
else
fold = new KFold(n_folds, samples, seed);
auto result = platform::cross_validation(fold, model_name, X, y, features, className, states);
result.setDataset(fileName);
experiment.setModelVersion("-FIXME-");
experiment.addResult(result);
delete fold;
}
cout << endl; cout << endl;
} }
experiment.setDuration(timer.getDuration()); experiment.setDuration(timer.getDuration());