From 1069fc8ff4f788967e7a2fd5dee88251f772009f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 21 Mar 2023 10:18:51 +0100 Subject: [PATCH] Add last mdlp version and update sample.cpp --- samples/sample.cpp | 59 ++++++++++++++++++++++++++-------------------- src/cppmdlp | 2 +- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/samples/sample.cpp b/samples/sample.cpp index 61f8321..9440421 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -22,10 +22,14 @@ void usage(const char* path) cout << "usage: " << basename << "[OPTION]" << endl; cout << " -h, --help\t\t Print this help and exit." << endl; - cout << " -f, --file[=FILENAME]\t {all, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}." << endl; + cout + << " -f, --file[=FILENAME]\t {all, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}." + << endl; cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl; cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl; - cout << " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any" << endl; + cout + << " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any" + << endl; cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl; } @@ -36,17 +40,17 @@ tuple parse_arguments(int argc, char** argv) int max_depth = numeric_limits::max(); int min_length = 3; float max_cutpoints = 0; - static struct option long_options[] = { - { "help", no_argument, 0, 'h' }, - { "file", required_argument, 0, 'f' }, - { "path", required_argument, 0, 'p' }, - { "max_depth", required_argument, 0, 'm' }, - { "max_cutpoints", required_argument, 0, 'c' }, - { "min_length", required_argument, 0, 'n' }, - { 0, 0, 0, 0 } + const option long_options[] = { + {"help", no_argument, nullptr, 'h'}, + {"file", required_argument, nullptr, 'f'}, + {"path", required_argument, nullptr, 'p'}, + {"max_depth", required_argument, nullptr, 'm'}, + {"max_cutpoints", required_argument, nullptr, 'c'}, + {"min_length", required_argument, nullptr, 'n'}, + {nullptr, no_argument, nullptr, 0} }; - while (1) { - auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, 0); + while (true) { + const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, nullptr); if (c == -1) break; switch (c) { @@ -54,16 +58,16 @@ tuple parse_arguments(int argc, char** argv) usage(argv[0]); exit(0); case 'f': - file_name = optarg; + file_name = string(optarg); break; case 'm': - max_depth = atoi(optarg); + max_depth = stoi(optarg); break; case 'n': - min_length = atoi(optarg); + min_length = stoi(optarg); break; case 'c': - max_cutpoints = atof(optarg); + max_cutpoints = stof(optarg); break; case 'p': path = optarg; @@ -84,13 +88,14 @@ tuple parse_arguments(int argc, char** argv) return make_tuple(file_name, path, max_depth, min_length, max_cutpoints); } -void process_file(string path, string file_name, bool class_last, int max_depth, int min_length, float max_cutpoints) +void process_file(const string& path, const string& file_name, bool class_last, int max_depth, int min_length, + float max_cutpoints) { ArffFiles file; file.load(path + file_name + ".arff", class_last); auto attributes = file.getAttributes(); - int items = file.getSize(); + auto items = file.getSize(); cout << "Number of lines: " << items << endl; cout << "Attributes: " << endl; for (auto attribute : attributes) { @@ -107,7 +112,7 @@ void process_file(string path, string file_name, bool class_last, int max_depth, } cout << y[i] << endl; } - mdlp::CPPFImdlp test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints); + auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints); auto total = 0; for (auto i = 0; i < attributes.size(); i++) { auto min_max = minmax_element(X[i].begin(), X[i].end()); @@ -124,12 +129,14 @@ void process_file(string path, string file_name, bool class_last, int max_depth, cout << "Total feature states: " << total + attributes.size() << endl; } -void process_all_files(map datasets, string path, int max_depth, int min_length, float max_cutpoints) +void process_all_files(const map& datasets, const string& path, int max_depth, int min_length, + float max_cutpoints) { - cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << endl << endl; + cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: " + << max_cutpoints << endl << endl; printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)"); printf("==================== ==== ==== ========\n"); - for (auto dataset : datasets) { + for (const auto& dataset : datasets) { ArffFiles file; file.load(path + dataset.first + ".arff", dataset.second); auto attributes = file.getAttributes(); @@ -138,7 +145,7 @@ void process_all_files(map datasets, string path, int max_depth, i size_t timing = 0; int cut_points = 0; for (auto i = 0; i < attributes.size(); i++) { - mdlp::CPPFImdlp test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints); + auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints); std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); test.fit(X[i], y); std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); @@ -161,8 +168,10 @@ int main(int argc, char** argv) {"mfeat-factors", true}, {"test", true} }; - string file_name, path; - int max_depth, min_length; + string file_name; + string path; + int max_depth; + int min_length; float max_cutpoints; tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv); if (datasets.find(file_name) == datasets.end() && file_name != "all") { diff --git a/src/cppmdlp b/src/cppmdlp index 12222f7..7713573 160000 --- a/src/cppmdlp +++ b/src/cppmdlp @@ -1 +1 @@ -Subproject commit 12222f790352189ef3a31908015ed47556e7ed28 +Subproject commit 77135739cf72cfc02603332c681baae4dcea28f3