Begin gridsearch implementation

This commit is contained in:
Ricardo Montañana Gómez 2023-11-22 12:22:30 +01:00
parent b657762c0c
commit fb347ed5b9
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
7 changed files with 110 additions and 106 deletions

View File

@ -9,7 +9,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
include_directories(${Python3_INCLUDE_DIRS})
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc Folding.cc Datasets.cc Dataset.cc)
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Folding.cc Datasets.cc Dataset.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)

View File

@ -133,7 +133,7 @@ namespace platform {
}
void Experiment::cross_validation(const std::string& fileName, bool quiet)
{
auto datasets = platform::Datasets(discretized, Paths::datasets());
auto datasets = Datasets(discretized, Paths::datasets());
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);

View File

@ -30,37 +30,41 @@ namespace platform {
int GridData::computeNumCombinations(const json& line)
{
int numCombinations = 1;
for (const auto& item : line) {
for (const auto& hyperparam : item.items()) {
numCombinations *= item.size();
}
for (const auto& item : line.items()) {
numCombinations *= item.value().size();
}
return numCombinations;
}
std::vector<json> GridData::doCombination(const std::string& model)
int GridData::getNumCombinations(const std::string& model)
{
int numTotal = 0;
for (const auto& item : grid[model]) {
numTotal += computeNumCombinations(item);
int numCombinations = 0;
for (const auto& line : grid.at(model)) {
numCombinations += computeNumCombinations(line);
}
auto result = std::vector<json>(numTotal);
int base = 0;
for (const auto& item : grid[model]) {
int numCombinations = computeNumCombinations(item);
int line = 0;
for (const auto& hyperparam : item.items()) {
int numValues = hyperparam.value().size();
for (const auto& value : hyperparam.value()) {
for (int i = 0; i < numCombinations / numValues; i++) {
result[base + line++][hyperparam.key()] = value;
//std::cout << "line=" << base + line << " " << hyperparam.key() << "=" << value << std::endl;
}
}
}
base += numCombinations;
return numCombinations;
}
json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
{
if (index == last) {
// If we reached the end of input, store the current combination
output.push_back(currentCombination);
return currentCombination;
}
for (const auto& item : result) {
std::cout << item.dump() << std::endl;
const auto& key = index.key();
const auto& values = index.value();
for (const auto& value : values) {
auto combination = currentCombination;
combination[key] = value;
json::iterator nextIndex = index;
generateCombinations(++nextIndex, last, output, combination);
}
return currentCombination;
}
std::vector<json> GridData::getGrid(const std::string& model)
{
auto result = std::vector<json>();
for (json line : grid.at(model)) {
generateCombinations(line.begin(), line.end(), result, json({}));
}
return result;
}

View File

@ -11,10 +11,11 @@ namespace platform {
public:
GridData();
~GridData() = default;
std::vector<json> getGrid(const std::string& model) { return doCombination(model); }
std::vector<json> getGrid(const std::string& model);
int getNumCombinations(const std::string& model);
private:
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination);
int computeNumCombinations(const json& line);
std::vector<json> doCombination(const std::string& model);
std::map<std::string, json> grid;
};
} /* namespace platform */

View File

@ -1,38 +1,91 @@
#include <iostream>
#include <torch/torch.h>
#include "GridSearch.h"
#include "Models.h"
#include "Paths.h"
#include "Datasets.h"
#include "HyperParameters.h"
#include "Folding.h"
#include "Colors.h"
namespace platform {
GridSearch::GridSearch(struct ConfigGrid& config) : config(config)
{
this->config.output_file = config.path + "grid_" + config.model + "_output.json";
}
void showProgress(int fold, const std::string& color, const std::string& phase)
{
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
std::string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
return Colors::GREEN();
case bayesnet::WARNING:
return Colors::YELLOW();
case bayesnet::ERROR:
return Colors::RED();
default:
return Colors::RESET();
}
}
void GridSearch::processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters)
{
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
auto features = datasets.getFeatures(fileName);
auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName);
std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
for (const auto& seed : config.seeds) {
std::cout << "(" << seed << ") doing Fold: " << flush;
Fold* fold;
if (config.stratified)
fold = new StratifiedKFold(config.n_folds, y, seed);
else
fold = new KFold(config.n_folds, y.size(0), seed);
for (int nfold = 0; nfold < config.n_folds; nfold++) {
auto clf = Models::instance()->create(config.model);
auto [train, test] = fold->getFold(nfold);
// auto train_t = torch::tensor(train);
// auto test_t = torch::tensor(test);
// auto X_train = X.index({ "...", train_t });
// auto y_train = y.index({ train_t });
// auto X_test = X.index({ "...", test_t });
// auto y_test = y.index({ test_t });
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model
// clf->fit(X_train, y_train, features, className, states);
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
}
delete fold;
}
}
void GridSearch::go()
{
// Load datasets
auto datasets = platform::Datasets(config.discretize, Paths::datasets());
int i = 0;
for (const auto& item : grid.getGrid("BoostAODE")) {
std::cout << i++ << " hyperparams: " << item.dump() << std::endl;
auto datasets = Datasets(config.discretize, Paths::datasets());
// Create model
std::cout << "***************** Starting Gridsearch *****************" << std::endl;
std::cout << "* Doing " << grid.getNumCombinations(config.model) << " combinations for each dataset/seed/fold" << std::endl;
// Generate hyperparameters grid & run gridsearch
// Check each combination of hyperparameters for each dataset and each seed
for (const auto& dataset : datasets.getNames()) {
std::cout << "- " << setw(20) << left << dataset << " " << right << flush;
for (const auto& hyperparam_line : grid.getGrid(config.model)) {
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
processFile(dataset, datasets, hyperparameters);
}
std::cout << std::endl;
}
// Load hyperparameters
// auto hyperparameters = platform::HyperParameters(datasets.getNames(), config.input_file);
// Check if hyperparameters are valid
// auto valid_hyperparameters = platform::Models::instance()->getHyperparameters(config.model);
// hyperparameters.check(valid_hyperparameters, config.model);
// // Load model
// auto model = platform::Models::instance()->get(config.model);
// // Run gridsearch
// auto grid = platform::Grid(datasets, hyperparameters, model, config.score, config.discretize, config.stratified, config.n_folds, config.seeds);
// grid.run();
// // Save results
// grid.save(config.output_file);
// Save results
save();
}
void GridSearch::save()
{
std::ofstream file(config.output_file);
// file << results.dump(4);
file.close();
}
} /* namespace platform */

View File

@ -2,6 +2,8 @@
#define GRIDSEARCH_H
#include <string>
#include <vector>
#include "Datasets.h"
#include "HyperParameters.h"
#include "GridData.h"
namespace platform {
@ -23,6 +25,7 @@ namespace platform {
void save();
~GridSearch() = default;
private:
void processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters);
struct ConfigGrid config;
GridData grid;
};

View File

@ -1,57 +0,0 @@
#include <iostream>
#include <vector>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
{
if (index == last) {
// If we reached the end of input, store the current combination
output.push_back(currentCombination);
return currentCombination;
}
const auto& key = index.key();
const auto& values = index.value();
for (const auto& value : values) {
auto combination = currentCombination;
combination[key] = value;
json::iterator nextIndex = index;
generateCombinations(++nextIndex, last, output, combination);
}
return currentCombination;
}
int main()
{
json input = R"(
[
{
"convergence": [true, false],
"ascending": [true, false],
"repeatSparent": [true, false],
"select_features": ["CFS", "FCBF"],
"tolerance": [0, 3, 5],
"threshold": [1e-7]
},
{
"convergence": [true, false],
"ascending": [true, false],
"repeatSparent": [true, false],
"select_features": ["IWSS"],
"tolerance": [0, 3, 5],
"threshold": [0.5]
}
]
)"_json;
auto output = std::vector<json>();
for (json line : input) {
generateCombinations(line.begin(), line.end(), output, json({}));
}
// Print the generated combinations
int i = 0;
for (const auto& item : output) {
std::cout << i++ << " " << item.dump() << std::endl;
}
return 0;
}