Add Graphs to results
Add bin5..bin10 q & u discretizers algos Fix trouble in computing states Update mdlp to 2.0.0
This commit is contained in:
@@ -21,7 +21,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
|
||||
# Options
|
||||
|
Submodule lib/libxlsxwriter updated: f483e65f2e...cf887d65ce
2
lib/mdlp
2
lib/mdlp
Submodule lib/mdlp updated: c4e6c041fe...2db60e007d
@@ -2,7 +2,7 @@ include_directories(
|
||||
## Libs
|
||||
${Platform_SOURCE_DIR}/lib/Files
|
||||
${Platform_SOURCE_DIR}/lib/folding
|
||||
${Platform_SOURCE_DIR}/lib/mdlp
|
||||
${Platform_SOURCE_DIR}/lib/mdlp/src
|
||||
${Platform_SOURCE_DIR}/lib/argparse/include
|
||||
${Platform_SOURCE_DIR}/lib/json/include
|
||||
${Platform_SOURCE_DIR}/lib/libxlsxwriter/include
|
||||
|
@@ -59,6 +59,7 @@ void manageArguments(argparse::ArgumentParser& program)
|
||||
smooth_arg.choices(choice);
|
||||
}
|
||||
program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true);
|
||||
program.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true);
|
||||
program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
|
||||
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
|
||||
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
|
||||
@@ -87,7 +88,7 @@ int main(int argc, char** argv)
|
||||
manageArguments(program);
|
||||
std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat;
|
||||
json hyperparameters_json;
|
||||
bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files;
|
||||
bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph;
|
||||
std::vector<int> seeds;
|
||||
std::vector<std::string> file_names;
|
||||
std::vector<std::string> filesToTest;
|
||||
@@ -103,6 +104,7 @@ int main(int argc, char** argv)
|
||||
smooth_strat = program.get<std::string>("smooth-strat");
|
||||
stratified = program.get<bool>("stratified");
|
||||
quiet = program.get<bool>("quiet");
|
||||
graph = program.get<bool>("graph");
|
||||
n_folds = program.get<int>("folds");
|
||||
seeds = program.get<std::vector<int>>("seeds");
|
||||
auto hyperparameters = program.get<std::string>("hyperparameters");
|
||||
@@ -200,7 +202,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
platform::Timer timer;
|
||||
timer.start();
|
||||
experiment.go(filesToTest, quiet, no_train_score, generate_fold_files);
|
||||
experiment.go(filesToTest, quiet, no_train_score, generate_fold_files, graph);
|
||||
experiment.setDuration(timer.getDuration());
|
||||
if (!quiet) {
|
||||
// Classification report if only one dataset is tested
|
||||
@@ -209,6 +211,9 @@ int main(int argc, char** argv)
|
||||
if (saveResults) {
|
||||
experiment.saveResult();
|
||||
}
|
||||
if (graph) {
|
||||
experiment.saveGraph();
|
||||
}
|
||||
std::cout << "Done!" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -131,8 +131,7 @@ namespace platform {
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
auto [max_value, idx] = torch::max(X_train.index({ i, "..." }), 0);
|
||||
states[features[i]] = std::vector<int>(max_value.item<int>() + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
iota(begin(states.at(features[i])), end(states.at(features[i])), 0);
|
||||
}
|
||||
auto [max_value, idx] = torch::max(y_train, 0);
|
||||
states[className] = std::vector<int>(max_value.item<int>() + 1);
|
||||
|
@@ -11,4 +11,28 @@ static platform::RegistrarDiscretization registrarBU4("bin4u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ4("bin4q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU5("bin5u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ5("bin5q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU6("bin6u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ6("bin6q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU7("bin7u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ7("bin7q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU8("bin8u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ8("bin8q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU9("bin9u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ9("bin9q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU10("bin10u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ10("bin10q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::QUANTILE);});
|
||||
#endif
|
@@ -29,7 +29,7 @@ namespace platform {
|
||||
{"framework", {"bulma", "bootstrap"}},
|
||||
{"margin", {"0.1", "0.2", "0.3"}},
|
||||
{"n_folds", {"5", "10"}},
|
||||
{"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q"}},
|
||||
{"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q", "bin5q", "bin5u", "bin6q", "bin6u", "bin7q", "bin7u", "bin8q", "bin8u", "bin9q", "bin9u", "bin10q", "bin10u"}},
|
||||
{"smooth_strat", {"ORIGINAL", "LAPLACE", "CESTNIK"}},
|
||||
{"platform", {"any"}},
|
||||
{"model", {"any"}},
|
||||
|
@@ -10,6 +10,7 @@ namespace platform {
|
||||
static std::string hiddenResults() { return "hidden_results/"; }
|
||||
static std::string excel() { return "excel/"; }
|
||||
static std::string grid() { return "grid/"; }
|
||||
static std::string graphs() { return "graphs/"; }
|
||||
static std::string datasets()
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
|
@@ -24,7 +24,24 @@ namespace platform {
|
||||
{
|
||||
std::cout << result.getJson().dump(4) << std::endl;
|
||||
}
|
||||
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files)
|
||||
void Experiment::saveGraph()
|
||||
{
|
||||
std::cout << "Saving graphs..." << std::endl;
|
||||
auto data = result.getJson();
|
||||
for (const auto& item : data["results"]) {
|
||||
auto graphs = item["graph"];
|
||||
int i = 0;
|
||||
for (const auto& graph : graphs) {
|
||||
i++;
|
||||
auto fileName = Paths::graphs() + result.getFilename() + "_graph_" + item["dataset"].get<std::string>() + "_" + std::to_string(i) + ".dot";
|
||||
auto file = std::ofstream(fileName);
|
||||
file << graph.get<std::string>();
|
||||
file.close();
|
||||
std::cout << "Graph saved in " << fileName << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
|
||||
{
|
||||
for (auto fileName : filesToProcess) {
|
||||
if (fileName.size() > max_name)
|
||||
@@ -48,7 +65,7 @@ namespace platform {
|
||||
for (auto fileName : filesToProcess) {
|
||||
if (!quiet)
|
||||
std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush;
|
||||
cross_validation(fileName, quiet, no_train_score, generate_fold_files);
|
||||
cross_validation(fileName, quiet, no_train_score, generate_fold_files, graph);
|
||||
if (!quiet)
|
||||
std::cout << std::endl;
|
||||
}
|
||||
@@ -71,7 +88,7 @@ namespace platform {
|
||||
|
||||
void showProgress(int fold, const std::string& color, const std::string& phase)
|
||||
{
|
||||
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
|
||||
std::string prefix = phase == "-" ? "" : "\b\b\b\b";
|
||||
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||
|
||||
}
|
||||
@@ -113,7 +130,7 @@ namespace platform {
|
||||
file << output.dump(4);
|
||||
file.close();
|
||||
}
|
||||
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files)
|
||||
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
|
||||
{
|
||||
//
|
||||
// Load dataset and prepare data
|
||||
@@ -151,6 +168,7 @@ namespace platform {
|
||||
json confusion_matrices = json::array();
|
||||
json confusion_matrices_train = json::array();
|
||||
std::vector<std::string> notes;
|
||||
std::vector<std::string> graphs;
|
||||
Timer train_timer, test_timer;
|
||||
int item = 0;
|
||||
bool first_seed = true;
|
||||
@@ -176,6 +194,8 @@ namespace platform {
|
||||
//
|
||||
for (int nfold = 0; nfold < nfolds; nfold++) {
|
||||
auto clf = Models::instance()->create(result.getModel());
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "-");
|
||||
setModelVersion(clf->getVersion());
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, fileName);
|
||||
@@ -237,6 +257,13 @@ namespace platform {
|
||||
partial_result.addTimeTrain(train_time[item].item<double>());
|
||||
partial_result.addTimeTest(test_time[item].item<double>());
|
||||
item++;
|
||||
if (graph) {
|
||||
std::string result = "";
|
||||
for (const auto& line : clf->graph()) {
|
||||
result += line + "\n";
|
||||
}
|
||||
graphs.push_back(result);
|
||||
}
|
||||
}
|
||||
if (!quiet)
|
||||
std::cout << "end. " << flush;
|
||||
@@ -245,6 +272,7 @@ namespace platform {
|
||||
//
|
||||
// Store result totals in Result
|
||||
//
|
||||
partial_result.setGraph(graphs);
|
||||
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
||||
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
||||
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||
|
@@ -48,10 +48,11 @@ namespace platform {
|
||||
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; }
|
||||
Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; }
|
||||
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
|
||||
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files);
|
||||
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files);
|
||||
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
|
||||
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
|
||||
void saveResult();
|
||||
void show();
|
||||
void saveGraph();
|
||||
void report(bool classification_report = false);
|
||||
private:
|
||||
Result result;
|
||||
|
@@ -15,6 +15,7 @@ namespace platform {
|
||||
data["times_train"] = json::array();
|
||||
data["times_test"] = json::array();
|
||||
data["notes"] = json::array();
|
||||
data["graph"] = json::array();
|
||||
data["train_time"] = 0.0;
|
||||
data["train_time_std"] = 0.0;
|
||||
data["test_time"] = 0.0;
|
||||
@@ -27,6 +28,12 @@ namespace platform {
|
||||
data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end());
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setGraph(const std::vector<std::string>& graph)
|
||||
{
|
||||
json graph_ = graph;
|
||||
data["graph"].insert(data["graph"].end(), graph_.begin(), graph_.end());
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setConfusionMatrices(const json& confusion_matrices) { data["confusion_matrices"] = confusion_matrices; return *this; }
|
||||
PartialResult& setConfusionMatricesTrain(const json& confusion_matrices) { data["confusion_matrices_train"] = confusion_matrices; return *this; }
|
||||
PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; }
|
||||
|
@@ -28,6 +28,7 @@ namespace platform {
|
||||
std::string getModel() const { return data["model"].get<std::string>(); };
|
||||
std::string getPlatform() const { return data["platform"].get<std::string>(); };
|
||||
std::string getScoreName() const { return data["score_name"].get<std::string>(); };
|
||||
|
||||
bool isComplete() const { return complete; };
|
||||
json getData() const { return data; }
|
||||
// Setters
|
||||
|
@@ -3,7 +3,7 @@ if(ENABLE_TESTING)
|
||||
include_directories(
|
||||
${Platform_SOURCE_DIR}/src
|
||||
${Platform_SOURCE_DIR}/lib/argparse/include
|
||||
${Platform_SOURCE_DIR}/lib/mdlp
|
||||
${Platform_SOURCE_DIR}/lib/mdlp/src
|
||||
${Platform_SOURCE_DIR}/lib/Files
|
||||
${Platform_SOURCE_DIR}/lib/json/include
|
||||
${Platform_SOURCE_DIR}/lib/folding
|
||||
|
@@ -30,7 +30,7 @@ TEST_CASE("Test BayesNet version", "[BayesNet]")
|
||||
TEST_CASE("Test mdlp version", "[mdlp]")
|
||||
{
|
||||
std::string version = mdlp::CPPFImdlp::version();
|
||||
REQUIRE(version == "1.2.1");
|
||||
REQUIRE(version == "2.0.0");
|
||||
}
|
||||
TEST_CASE("Test Arff version", "[Arff]")
|
||||
{
|
||||
|
Reference in New Issue
Block a user