diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index ace3c51..6398778 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -2,5 +2,6 @@ cmake_minimum_required(VERSION 3.20) project(sample) set(CMAKE_CXX_STANDARD 11) +set(CMAKE_BUILD_TYPE Debug) add_executable(sample sample.cpp ../src/cppmdlp/tests/ArffFiles.cpp ../src/cppmdlp/Metrics.cpp ../src/cppmdlp/CPPFImdlp.cpp) diff --git a/samples/sample.cpp b/samples/sample.cpp index 9440421..c27ce2d 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -28,7 +28,7 @@ void usage(const char* path) cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl; cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl; cout - << " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any" + << " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any" << endl; cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl; } @@ -40,7 +40,7 @@ tuple parse_arguments(int argc, char** argv) int max_depth = numeric_limits::max(); int min_length = 3; float max_cutpoints = 0; - const option long_options[] = { + const vector long_options = { {"help", no_argument, nullptr, 'h'}, {"file", required_argument, nullptr, 'f'}, {"path", required_argument, nullptr, 'p'}, @@ -50,7 +50,7 @@ tuple parse_arguments(int argc, char** argv) {nullptr, no_argument, nullptr, 0} }; while (true) { - const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, nullptr); + const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options.data(), nullptr); if (c == -1) break; switch (c) { @@ -94,8 +94,8 @@ void process_file(const string& path, const string& file_name, bool class_last, ArffFiles file; file.load(path + file_name + ".arff", class_last); - auto attributes = file.getAttributes(); - auto items = file.getSize(); + const auto attributes = file.getAttributes(); + const auto items = file.getSize(); cout << "Number of lines: " << items << endl; cout << "Attributes: " << endl; for (auto attribute : attributes) { @@ -113,17 +113,21 @@ void process_file(const string& path, const string& file_name, bool class_last, cout << y[i] << endl; } auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints); - auto total = 0; + size_t total = 0; for (auto i = 0; i < attributes.size(); i++) { auto min_max = minmax_element(X[i].begin(), X[i].end()); - cout << "Cut points for " << get<0>(attributes[i]) << endl; - cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl; - cout << "--------------------------" << setprecision(3) << endl; + cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3); test.fit(X[i], y); - for (auto item : test.getCutPoints()) { - cout << item << endl; + auto cut_points = test.getCutPoints(); + for (auto item : cut_points) { + cout << item; + if (item != cut_points.back()) + cout << ", "; } total += test.getCutPoints().size(); + cout << "]" << endl; + cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl; + cout << "--------------------------" << endl; } cout << "Total cut points ...: " << total << endl; cout << "Total feature states: " << total + attributes.size() << endl; @@ -143,7 +147,7 @@ void process_all_files(const map& datasets, const string& path, in vector& X = file.getX(); labels_t& y = file.getY(); size_t timing = 0; - int cut_points = 0; + size_t cut_points = 0; for (auto i = 0; i < attributes.size(); i++) { auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints); std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); @@ -152,7 +156,7 @@ void process_all_files(const map& datasets, const string& path, in timing += std::chrono::duration_cast(end - begin).count(); cut_points += test.getCutPoints().size(); } - printf("%-20s %4lu %4d %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing); + printf("%-20s %4lu %4zu %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing); } } diff --git a/src/cppmdlp b/src/cppmdlp index d77d274..db76afc 160000 --- a/src/cppmdlp +++ b/src/cppmdlp @@ -1 +1 @@ -Subproject commit d77d27459ba6fbddcbc54469fab718ab4337290d +Subproject commit db76afc4e2f38c4b365925b84b31c0bb5713dc84 diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py index a91a55e..e0a5990 100644 --- a/src/fimdlp/tests/FImdlp_test.py +++ b/src/fimdlp/tests/FImdlp_test.py @@ -18,6 +18,13 @@ class FImdlpTest(unittest.TestCase): f"{__version__}({CFImdlp().get_version().decode()})", ) + def test_minimum_mdlp_version(self): + mdlp_version = tuple( + int(c) for c in CFImdlp().get_version().decode().split(".")[0:3] + ) + minimum_mdlp_version = (1, 1, 2) + self.assertTrue(mdlp_version >= minimum_mdlp_version) + def test_init(self): clf = FImdlp() self.assertEqual(-1, clf.n_jobs) @@ -312,11 +319,11 @@ class FImdlpTest(unittest.TestCase): clf.fit(X, y) expected_cutpoints = [ [5.45], - [3.35], + [2.85], [2.45], [0.8], ] - expected_depths = [1] * 4 + expected_depths = [3, 5, 4, 3] self.assertListEqual(expected_depths, clf.get_depths()) for expected, computed in zip( expected_cutpoints, clf.get_cut_points()