Add Graphs to results

Add bin5..bin10 q & u discretizers algos
Fix trouble in computing states
Update mdlp to 2.0.0
This commit is contained in:
2024-07-11 11:23:20 +02:00
parent 3acc34e4c6
commit 26dfe6d056
15 changed files with 83 additions and 17 deletions

View File

@@ -21,7 +21,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
set(CMAKE_CXX_FLAGS_DEBUG " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
# Options

View File

@@ -2,7 +2,7 @@ include_directories(
## Libs
${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/folding
${Platform_SOURCE_DIR}/lib/mdlp
${Platform_SOURCE_DIR}/lib/mdlp/src
${Platform_SOURCE_DIR}/lib/argparse/include
${Platform_SOURCE_DIR}/lib/json/include
${Platform_SOURCE_DIR}/lib/libxlsxwriter/include

View File

@@ -59,6 +59,7 @@ void manageArguments(argparse::ArgumentParser& program)
smooth_arg.choices(choice);
}
program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true);
program.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true);
program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
@@ -87,7 +88,7 @@ int main(int argc, char** argv)
manageArguments(program);
std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat;
json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files;
bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph;
std::vector<int> seeds;
std::vector<std::string> file_names;
std::vector<std::string> filesToTest;
@@ -103,6 +104,7 @@ int main(int argc, char** argv)
smooth_strat = program.get<std::string>("smooth-strat");
stratified = program.get<bool>("stratified");
quiet = program.get<bool>("quiet");
graph = program.get<bool>("graph");
n_folds = program.get<int>("folds");
seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters");
@@ -200,7 +202,7 @@ int main(int argc, char** argv)
}
platform::Timer timer;
timer.start();
experiment.go(filesToTest, quiet, no_train_score, generate_fold_files);
experiment.go(filesToTest, quiet, no_train_score, generate_fold_files, graph);
experiment.setDuration(timer.getDuration());
if (!quiet) {
// Classification report if only one dataset is tested
@@ -209,6 +211,9 @@ int main(int argc, char** argv)
if (saveResults) {
experiment.saveResult();
}
if (graph) {
experiment.saveGraph();
}
std::cout << "Done!" << std::endl;
return 0;
}

View File

@@ -131,8 +131,7 @@ namespace platform {
for (int i = 0; i < features.size(); ++i) {
auto [max_value, idx] = torch::max(X_train.index({ i, "..." }), 0);
states[features[i]] = std::vector<int>(max_value.item<int>() + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
iota(begin(states.at(features[i])), end(states.at(features[i])), 0);
}
auto [max_value, idx] = torch::max(y_train, 0);
states[className] = std::vector<int>(max_value.item<int>() + 1);

View File

@@ -11,4 +11,28 @@ static platform::RegistrarDiscretization registrarBU4("bin4u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ4("bin4q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU5("bin5u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ5("bin5q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU6("bin6u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ6("bin6q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU7("bin7u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ7("bin7q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU8("bin8u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ8("bin8q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU9("bin9u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ9("bin9q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU10("bin10u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ10("bin10q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::QUANTILE);});
#endif

View File

@@ -29,7 +29,7 @@ namespace platform {
{"framework", {"bulma", "bootstrap"}},
{"margin", {"0.1", "0.2", "0.3"}},
{"n_folds", {"5", "10"}},
{"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q"}},
{"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q", "bin5q", "bin5u", "bin6q", "bin6u", "bin7q", "bin7u", "bin8q", "bin8u", "bin9q", "bin9u", "bin10q", "bin10u"}},
{"smooth_strat", {"ORIGINAL", "LAPLACE", "CESTNIK"}},
{"platform", {"any"}},
{"model", {"any"}},

View File

@@ -10,6 +10,7 @@ namespace platform {
static std::string hiddenResults() { return "hidden_results/"; }
static std::string excel() { return "excel/"; }
static std::string grid() { return "grid/"; }
static std::string graphs() { return "graphs/"; }
static std::string datasets()
{
auto env = platform::DotEnv();

View File

@@ -24,7 +24,24 @@ namespace platform {
{
std::cout << result.getJson().dump(4) << std::endl;
}
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files)
void Experiment::saveGraph()
{
std::cout << "Saving graphs..." << std::endl;
auto data = result.getJson();
for (const auto& item : data["results"]) {
auto graphs = item["graph"];
int i = 0;
for (const auto& graph : graphs) {
i++;
auto fileName = Paths::graphs() + result.getFilename() + "_graph_" + item["dataset"].get<std::string>() + "_" + std::to_string(i) + ".dot";
auto file = std::ofstream(fileName);
file << graph.get<std::string>();
file.close();
std::cout << "Graph saved in " << fileName << std::endl;
}
}
}
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
{
for (auto fileName : filesToProcess) {
if (fileName.size() > max_name)
@@ -48,7 +65,7 @@ namespace platform {
for (auto fileName : filesToProcess) {
if (!quiet)
std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush;
cross_validation(fileName, quiet, no_train_score, generate_fold_files);
cross_validation(fileName, quiet, no_train_score, generate_fold_files, graph);
if (!quiet)
std::cout << std::endl;
}
@@ -71,7 +88,7 @@ namespace platform {
void showProgress(int fold, const std::string& color, const std::string& phase)
{
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
std::string prefix = phase == "-" ? "" : "\b\b\b\b";
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
@@ -113,7 +130,7 @@ namespace platform {
file << output.dump(4);
file.close();
}
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files)
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
{
//
// Load dataset and prepare data
@@ -151,6 +168,7 @@ namespace platform {
json confusion_matrices = json::array();
json confusion_matrices_train = json::array();
std::vector<std::string> notes;
std::vector<std::string> graphs;
Timer train_timer, test_timer;
int item = 0;
bool first_seed = true;
@@ -176,6 +194,8 @@ namespace platform {
//
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(result.getModel());
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "-");
setModelVersion(clf->getVersion());
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, fileName);
@@ -237,6 +257,13 @@ namespace platform {
partial_result.addTimeTrain(train_time[item].item<double>());
partial_result.addTimeTest(test_time[item].item<double>());
item++;
if (graph) {
std::string result = "";
for (const auto& line : clf->graph()) {
result += line + "\n";
}
graphs.push_back(result);
}
}
if (!quiet)
std::cout << "end. " << flush;
@@ -245,6 +272,7 @@ namespace platform {
//
// Store result totals in Result
//
partial_result.setGraph(graphs);
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());

View File

@@ -48,10 +48,11 @@ namespace platform {
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; }
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files);
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files);
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
void saveResult();
void show();
void saveGraph();
void report(bool classification_report = false);
private:
Result result;

View File

@@ -15,6 +15,7 @@ namespace platform {
data["times_train"] = json::array();
data["times_test"] = json::array();
data["notes"] = json::array();
data["graph"] = json::array();
data["train_time"] = 0.0;
data["train_time_std"] = 0.0;
data["test_time"] = 0.0;
@@ -27,6 +28,12 @@ namespace platform {
data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end());
return *this;
}
PartialResult& setGraph(const std::vector<std::string>& graph)
{
json graph_ = graph;
data["graph"].insert(data["graph"].end(), graph_.begin(), graph_.end());
return *this;
}
PartialResult& setConfusionMatrices(const json& confusion_matrices) { data["confusion_matrices"] = confusion_matrices; return *this; }
PartialResult& setConfusionMatricesTrain(const json& confusion_matrices) { data["confusion_matrices_train"] = confusion_matrices; return *this; }
PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; }

View File

@@ -28,6 +28,7 @@ namespace platform {
std::string getModel() const { return data["model"].get<std::string>(); };
std::string getPlatform() const { return data["platform"].get<std::string>(); };
std::string getScoreName() const { return data["score_name"].get<std::string>(); };
bool isComplete() const { return complete; };
json getData() const { return data; }
// Setters

View File

@@ -3,7 +3,7 @@ if(ENABLE_TESTING)
include_directories(
${Platform_SOURCE_DIR}/src
${Platform_SOURCE_DIR}/lib/argparse/include
${Platform_SOURCE_DIR}/lib/mdlp
${Platform_SOURCE_DIR}/lib/mdlp/src
${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/json/include
${Platform_SOURCE_DIR}/lib/folding

View File

@@ -30,7 +30,7 @@ TEST_CASE("Test BayesNet version", "[BayesNet]")
TEST_CASE("Test mdlp version", "[mdlp]")
{
std::string version = mdlp::CPPFImdlp::version();
REQUIRE(version == "1.2.1");
REQUIRE(version == "2.0.0");
}
TEST_CASE("Test Arff version", "[Arff]")
{