Update mdlp version

Add mimimun mdlp version test
Update sample.cpp
This commit is contained in:
2023-04-25 12:05:52 +02:00
parent 878cd379ee
commit 3ed491cd34
4 changed files with 28 additions and 16 deletions

View File

@@ -2,5 +2,6 @@ cmake_minimum_required(VERSION 3.20)
project(sample)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
add_executable(sample sample.cpp ../src/cppmdlp/tests/ArffFiles.cpp ../src/cppmdlp/Metrics.cpp ../src/cppmdlp/CPPFImdlp.cpp)

View File

@@ -28,7 +28,7 @@ void usage(const char* path)
cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
cout
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any"
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
<< endl;
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
}
@@ -40,7 +40,7 @@ tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
int max_depth = numeric_limits<int>::max();
int min_length = 3;
float max_cutpoints = 0;
const option long_options[] = {
const vector<struct option> long_options = {
{"help", no_argument, nullptr, 'h'},
{"file", required_argument, nullptr, 'f'},
{"path", required_argument, nullptr, 'p'},
@@ -50,7 +50,7 @@ tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
{nullptr, no_argument, nullptr, 0}
};
while (true) {
const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, nullptr);
const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options.data(), nullptr);
if (c == -1)
break;
switch (c) {
@@ -94,8 +94,8 @@ void process_file(const string& path, const string& file_name, bool class_last,
ArffFiles file;
file.load(path + file_name + ".arff", class_last);
auto attributes = file.getAttributes();
auto items = file.getSize();
const auto attributes = file.getAttributes();
const auto items = file.getSize();
cout << "Number of lines: " << items << endl;
cout << "Attributes: " << endl;
for (auto attribute : attributes) {
@@ -113,17 +113,21 @@ void process_file(const string& path, const string& file_name, bool class_last,
cout << y[i] << endl;
}
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
auto total = 0;
size_t total = 0;
for (auto i = 0; i < attributes.size(); i++) {
auto min_max = minmax_element(X[i].begin(), X[i].end());
cout << "Cut points for " << get<0>(attributes[i]) << endl;
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
cout << "--------------------------" << setprecision(3) << endl;
cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
test.fit(X[i], y);
for (auto item : test.getCutPoints()) {
cout << item << endl;
auto cut_points = test.getCutPoints();
for (auto item : cut_points) {
cout << item;
if (item != cut_points.back())
cout << ", ";
}
total += test.getCutPoints().size();
cout << "]" << endl;
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
cout << "--------------------------" << endl;
}
cout << "Total cut points ...: " << total << endl;
cout << "Total feature states: " << total + attributes.size() << endl;
@@ -143,7 +147,7 @@ void process_all_files(const map<string, bool>& datasets, const string& path, in
vector<samples_t>& X = file.getX();
labels_t& y = file.getY();
size_t timing = 0;
int cut_points = 0;
size_t cut_points = 0;
for (auto i = 0; i < attributes.size(); i++) {
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
@@ -152,7 +156,7 @@ void process_all_files(const map<string, bool>& datasets, const string& path, in
timing += std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
cut_points += test.getCutPoints().size();
}
printf("%-20s %4lu %4d %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
printf("%-20s %4lu %4zu %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
}
}

View File

@@ -18,6 +18,13 @@ class FImdlpTest(unittest.TestCase):
f"{__version__}({CFImdlp().get_version().decode()})",
)
def test_minimum_mdlp_version(self):
mdlp_version = tuple(
int(c) for c in CFImdlp().get_version().decode().split(".")[0:3]
)
minimum_mdlp_version = (1, 1, 2)
self.assertTrue(mdlp_version >= minimum_mdlp_version)
def test_init(self):
clf = FImdlp()
self.assertEqual(-1, clf.n_jobs)
@@ -312,11 +319,11 @@ class FImdlpTest(unittest.TestCase):
clf.fit(X, y)
expected_cutpoints = [
[5.45],
[3.35],
[2.85],
[2.45],
[0.8],
]
expected_depths = [1] * 4
expected_depths = [3, 5, 4, 3]
self.assertListEqual(expected_depths, clf.get_depths())
for expected, computed in zip(
expected_cutpoints, clf.get_cut_points()