Begin gridsearch implementation
This commit is contained in:
parent
b657762c0c
commit
fb347ed5b9
@ -9,7 +9,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
|
|||||||
include_directories(${Python3_INCLUDE_DIRS})
|
include_directories(${Python3_INCLUDE_DIRS})
|
||||||
|
|
||||||
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
||||||
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc Folding.cc Datasets.cc Dataset.cc)
|
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Folding.cc Datasets.cc Dataset.cc)
|
||||||
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
|
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
|
||||||
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
|
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
|
||||||
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
||||||
|
@ -133,7 +133,7 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
void Experiment::cross_validation(const std::string& fileName, bool quiet)
|
void Experiment::cross_validation(const std::string& fileName, bool quiet)
|
||||||
{
|
{
|
||||||
auto datasets = platform::Datasets(discretized, Paths::datasets());
|
auto datasets = Datasets(discretized, Paths::datasets());
|
||||||
// Get dataset
|
// Get dataset
|
||||||
auto [X, y] = datasets.getTensors(fileName);
|
auto [X, y] = datasets.getTensors(fileName);
|
||||||
auto states = datasets.getStates(fileName);
|
auto states = datasets.getStates(fileName);
|
||||||
|
@ -30,37 +30,41 @@ namespace platform {
|
|||||||
int GridData::computeNumCombinations(const json& line)
|
int GridData::computeNumCombinations(const json& line)
|
||||||
{
|
{
|
||||||
int numCombinations = 1;
|
int numCombinations = 1;
|
||||||
for (const auto& item : line) {
|
for (const auto& item : line.items()) {
|
||||||
for (const auto& hyperparam : item.items()) {
|
numCombinations *= item.value().size();
|
||||||
numCombinations *= item.size();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return numCombinations;
|
return numCombinations;
|
||||||
}
|
}
|
||||||
std::vector<json> GridData::doCombination(const std::string& model)
|
int GridData::getNumCombinations(const std::string& model)
|
||||||
{
|
{
|
||||||
int numTotal = 0;
|
int numCombinations = 0;
|
||||||
for (const auto& item : grid[model]) {
|
for (const auto& line : grid.at(model)) {
|
||||||
numTotal += computeNumCombinations(item);
|
numCombinations += computeNumCombinations(line);
|
||||||
}
|
}
|
||||||
auto result = std::vector<json>(numTotal);
|
return numCombinations;
|
||||||
int base = 0;
|
}
|
||||||
for (const auto& item : grid[model]) {
|
json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
|
||||||
int numCombinations = computeNumCombinations(item);
|
{
|
||||||
int line = 0;
|
if (index == last) {
|
||||||
for (const auto& hyperparam : item.items()) {
|
// If we reached the end of input, store the current combination
|
||||||
int numValues = hyperparam.value().size();
|
output.push_back(currentCombination);
|
||||||
for (const auto& value : hyperparam.value()) {
|
return currentCombination;
|
||||||
for (int i = 0; i < numCombinations / numValues; i++) {
|
|
||||||
result[base + line++][hyperparam.key()] = value;
|
|
||||||
//std::cout << "line=" << base + line << " " << hyperparam.key() << "=" << value << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
base += numCombinations;
|
|
||||||
}
|
}
|
||||||
for (const auto& item : result) {
|
const auto& key = index.key();
|
||||||
std::cout << item.dump() << std::endl;
|
const auto& values = index.value();
|
||||||
|
for (const auto& value : values) {
|
||||||
|
auto combination = currentCombination;
|
||||||
|
combination[key] = value;
|
||||||
|
json::iterator nextIndex = index;
|
||||||
|
generateCombinations(++nextIndex, last, output, combination);
|
||||||
|
}
|
||||||
|
return currentCombination;
|
||||||
|
}
|
||||||
|
std::vector<json> GridData::getGrid(const std::string& model)
|
||||||
|
{
|
||||||
|
auto result = std::vector<json>();
|
||||||
|
for (json line : grid.at(model)) {
|
||||||
|
generateCombinations(line.begin(), line.end(), result, json({}));
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -11,10 +11,11 @@ namespace platform {
|
|||||||
public:
|
public:
|
||||||
GridData();
|
GridData();
|
||||||
~GridData() = default;
|
~GridData() = default;
|
||||||
std::vector<json> getGrid(const std::string& model) { return doCombination(model); }
|
std::vector<json> getGrid(const std::string& model);
|
||||||
|
int getNumCombinations(const std::string& model);
|
||||||
private:
|
private:
|
||||||
|
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination);
|
||||||
int computeNumCombinations(const json& line);
|
int computeNumCombinations(const json& line);
|
||||||
std::vector<json> doCombination(const std::string& model);
|
|
||||||
std::map<std::string, json> grid;
|
std::map<std::string, json> grid;
|
||||||
};
|
};
|
||||||
} /* namespace platform */
|
} /* namespace platform */
|
||||||
|
@ -1,38 +1,91 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <torch/torch.h>
|
||||||
#include "GridSearch.h"
|
#include "GridSearch.h"
|
||||||
|
#include "Models.h"
|
||||||
#include "Paths.h"
|
#include "Paths.h"
|
||||||
#include "Datasets.h"
|
#include "Folding.h"
|
||||||
#include "HyperParameters.h"
|
#include "Colors.h"
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
GridSearch::GridSearch(struct ConfigGrid& config) : config(config)
|
GridSearch::GridSearch(struct ConfigGrid& config) : config(config)
|
||||||
{
|
{
|
||||||
this->config.output_file = config.path + "grid_" + config.model + "_output.json";
|
this->config.output_file = config.path + "grid_" + config.model + "_output.json";
|
||||||
}
|
}
|
||||||
|
void showProgress(int fold, const std::string& color, const std::string& phase)
|
||||||
|
{
|
||||||
|
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
|
||||||
|
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||||
|
}
|
||||||
|
std::string getColor(bayesnet::status_t status)
|
||||||
|
{
|
||||||
|
switch (status) {
|
||||||
|
case bayesnet::NORMAL:
|
||||||
|
return Colors::GREEN();
|
||||||
|
case bayesnet::WARNING:
|
||||||
|
return Colors::YELLOW();
|
||||||
|
case bayesnet::ERROR:
|
||||||
|
return Colors::RED();
|
||||||
|
default:
|
||||||
|
return Colors::RESET();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void GridSearch::processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters)
|
||||||
|
{
|
||||||
|
// Get dataset
|
||||||
|
auto [X, y] = datasets.getTensors(fileName);
|
||||||
|
auto states = datasets.getStates(fileName);
|
||||||
|
auto features = datasets.getFeatures(fileName);
|
||||||
|
auto samples = datasets.getNSamples(fileName);
|
||||||
|
auto className = datasets.getClassName(fileName);
|
||||||
|
std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
|
||||||
|
for (const auto& seed : config.seeds) {
|
||||||
|
std::cout << "(" << seed << ") doing Fold: " << flush;
|
||||||
|
Fold* fold;
|
||||||
|
if (config.stratified)
|
||||||
|
fold = new StratifiedKFold(config.n_folds, y, seed);
|
||||||
|
else
|
||||||
|
fold = new KFold(config.n_folds, y.size(0), seed);
|
||||||
|
for (int nfold = 0; nfold < config.n_folds; nfold++) {
|
||||||
|
auto clf = Models::instance()->create(config.model);
|
||||||
|
auto [train, test] = fold->getFold(nfold);
|
||||||
|
// auto train_t = torch::tensor(train);
|
||||||
|
// auto test_t = torch::tensor(test);
|
||||||
|
// auto X_train = X.index({ "...", train_t });
|
||||||
|
// auto y_train = y.index({ train_t });
|
||||||
|
// auto X_test = X.index({ "...", test_t });
|
||||||
|
// auto y_test = y.index({ test_t });
|
||||||
|
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
|
||||||
|
// Train model
|
||||||
|
// clf->fit(X_train, y_train, features, className, states);
|
||||||
|
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
|
||||||
|
}
|
||||||
|
delete fold;
|
||||||
|
}
|
||||||
|
}
|
||||||
void GridSearch::go()
|
void GridSearch::go()
|
||||||
{
|
{
|
||||||
// Load datasets
|
// Load datasets
|
||||||
auto datasets = platform::Datasets(config.discretize, Paths::datasets());
|
auto datasets = Datasets(config.discretize, Paths::datasets());
|
||||||
int i = 0;
|
// Create model
|
||||||
for (const auto& item : grid.getGrid("BoostAODE")) {
|
std::cout << "***************** Starting Gridsearch *****************" << std::endl;
|
||||||
std::cout << i++ << " hyperparams: " << item.dump() << std::endl;
|
std::cout << "* Doing " << grid.getNumCombinations(config.model) << " combinations for each dataset/seed/fold" << std::endl;
|
||||||
|
// Generate hyperparameters grid & run gridsearch
|
||||||
|
// Check each combination of hyperparameters for each dataset and each seed
|
||||||
|
for (const auto& dataset : datasets.getNames()) {
|
||||||
|
std::cout << "- " << setw(20) << left << dataset << " " << right << flush;
|
||||||
|
for (const auto& hyperparam_line : grid.getGrid(config.model)) {
|
||||||
|
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
|
||||||
|
processFile(dataset, datasets, hyperparameters);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
// Load hyperparameters
|
// Save results
|
||||||
// auto hyperparameters = platform::HyperParameters(datasets.getNames(), config.input_file);
|
save();
|
||||||
// Check if hyperparameters are valid
|
|
||||||
// auto valid_hyperparameters = platform::Models::instance()->getHyperparameters(config.model);
|
|
||||||
// hyperparameters.check(valid_hyperparameters, config.model);
|
|
||||||
// // Load model
|
|
||||||
// auto model = platform::Models::instance()->get(config.model);
|
|
||||||
// // Run gridsearch
|
|
||||||
// auto grid = platform::Grid(datasets, hyperparameters, model, config.score, config.discretize, config.stratified, config.n_folds, config.seeds);
|
|
||||||
// grid.run();
|
|
||||||
// // Save results
|
|
||||||
// grid.save(config.output_file);
|
|
||||||
}
|
}
|
||||||
void GridSearch::save()
|
void GridSearch::save()
|
||||||
{
|
{
|
||||||
|
std::ofstream file(config.output_file);
|
||||||
|
// file << results.dump(4);
|
||||||
|
file.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
} /* namespace platform */
|
} /* namespace platform */
|
@ -2,6 +2,8 @@
|
|||||||
#define GRIDSEARCH_H
|
#define GRIDSEARCH_H
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "Datasets.h"
|
||||||
|
#include "HyperParameters.h"
|
||||||
#include "GridData.h"
|
#include "GridData.h"
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
@ -23,6 +25,7 @@ namespace platform {
|
|||||||
void save();
|
void save();
|
||||||
~GridSearch() = default;
|
~GridSearch() = default;
|
||||||
private:
|
private:
|
||||||
|
void processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters);
|
||||||
struct ConfigGrid config;
|
struct ConfigGrid config;
|
||||||
GridData grid;
|
GridData grid;
|
||||||
};
|
};
|
||||||
|
@ -1,57 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
#include <nlohmann/json.hpp>
|
|
||||||
|
|
||||||
using json = nlohmann::json;
|
|
||||||
|
|
||||||
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
|
|
||||||
{
|
|
||||||
if (index == last) {
|
|
||||||
// If we reached the end of input, store the current combination
|
|
||||||
output.push_back(currentCombination);
|
|
||||||
return currentCombination;
|
|
||||||
}
|
|
||||||
const auto& key = index.key();
|
|
||||||
const auto& values = index.value();
|
|
||||||
for (const auto& value : values) {
|
|
||||||
auto combination = currentCombination;
|
|
||||||
combination[key] = value;
|
|
||||||
json::iterator nextIndex = index;
|
|
||||||
generateCombinations(++nextIndex, last, output, combination);
|
|
||||||
}
|
|
||||||
return currentCombination;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main()
|
|
||||||
{
|
|
||||||
json input = R"(
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"convergence": [true, false],
|
|
||||||
"ascending": [true, false],
|
|
||||||
"repeatSparent": [true, false],
|
|
||||||
"select_features": ["CFS", "FCBF"],
|
|
||||||
"tolerance": [0, 3, 5],
|
|
||||||
"threshold": [1e-7]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"convergence": [true, false],
|
|
||||||
"ascending": [true, false],
|
|
||||||
"repeatSparent": [true, false],
|
|
||||||
"select_features": ["IWSS"],
|
|
||||||
"tolerance": [0, 3, 5],
|
|
||||||
"threshold": [0.5]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
)"_json;
|
|
||||||
auto output = std::vector<json>();
|
|
||||||
for (json line : input) {
|
|
||||||
generateCombinations(line.begin(), line.end(), output, json({}));
|
|
||||||
}
|
|
||||||
// Print the generated combinations
|
|
||||||
int i = 0;
|
|
||||||
for (const auto& item : output) {
|
|
||||||
std::cout << i++ << " " << item.dump() << std::endl;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user