mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-18 08:55:51 +00:00
Add last mdlp version and update sample.cpp
This commit is contained in:
@@ -22,10 +22,14 @@ void usage(const char* path)
|
|||||||
|
|
||||||
cout << "usage: " << basename << "[OPTION]" << endl;
|
cout << "usage: " << basename << "[OPTION]" << endl;
|
||||||
cout << " -h, --help\t\t Print this help and exit." << endl;
|
cout << " -h, --help\t\t Print this help and exit." << endl;
|
||||||
cout << " -f, --file[=FILENAME]\t {all, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}." << endl;
|
cout
|
||||||
|
<< " -f, --file[=FILENAME]\t {all, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
||||||
|
<< endl;
|
||||||
cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
|
cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
|
||||||
cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
|
cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
|
||||||
cout << " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any" << endl;
|
cout
|
||||||
|
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any"
|
||||||
|
<< endl;
|
||||||
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
|
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -36,17 +40,17 @@ tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
|||||||
int max_depth = numeric_limits<int>::max();
|
int max_depth = numeric_limits<int>::max();
|
||||||
int min_length = 3;
|
int min_length = 3;
|
||||||
float max_cutpoints = 0;
|
float max_cutpoints = 0;
|
||||||
static struct option long_options[] = {
|
const option long_options[] = {
|
||||||
{ "help", no_argument, 0, 'h' },
|
{"help", no_argument, nullptr, 'h'},
|
||||||
{ "file", required_argument, 0, 'f' },
|
{"file", required_argument, nullptr, 'f'},
|
||||||
{ "path", required_argument, 0, 'p' },
|
{"path", required_argument, nullptr, 'p'},
|
||||||
{ "max_depth", required_argument, 0, 'm' },
|
{"max_depth", required_argument, nullptr, 'm'},
|
||||||
{ "max_cutpoints", required_argument, 0, 'c' },
|
{"max_cutpoints", required_argument, nullptr, 'c'},
|
||||||
{ "min_length", required_argument, 0, 'n' },
|
{"min_length", required_argument, nullptr, 'n'},
|
||||||
{ 0, 0, 0, 0 }
|
{nullptr, no_argument, nullptr, 0}
|
||||||
};
|
};
|
||||||
while (1) {
|
while (true) {
|
||||||
auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, 0);
|
const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, nullptr);
|
||||||
if (c == -1)
|
if (c == -1)
|
||||||
break;
|
break;
|
||||||
switch (c) {
|
switch (c) {
|
||||||
@@ -54,16 +58,16 @@ tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
|||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
exit(0);
|
exit(0);
|
||||||
case 'f':
|
case 'f':
|
||||||
file_name = optarg;
|
file_name = string(optarg);
|
||||||
break;
|
break;
|
||||||
case 'm':
|
case 'm':
|
||||||
max_depth = atoi(optarg);
|
max_depth = stoi(optarg);
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
min_length = atoi(optarg);
|
min_length = stoi(optarg);
|
||||||
break;
|
break;
|
||||||
case 'c':
|
case 'c':
|
||||||
max_cutpoints = atof(optarg);
|
max_cutpoints = stof(optarg);
|
||||||
break;
|
break;
|
||||||
case 'p':
|
case 'p':
|
||||||
path = optarg;
|
path = optarg;
|
||||||
@@ -84,13 +88,14 @@ tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
|||||||
return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
|
return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_file(string path, string file_name, bool class_last, int max_depth, int min_length, float max_cutpoints)
|
void process_file(const string& path, const string& file_name, bool class_last, int max_depth, int min_length,
|
||||||
|
float max_cutpoints)
|
||||||
{
|
{
|
||||||
ArffFiles file;
|
ArffFiles file;
|
||||||
|
|
||||||
file.load(path + file_name + ".arff", class_last);
|
file.load(path + file_name + ".arff", class_last);
|
||||||
auto attributes = file.getAttributes();
|
auto attributes = file.getAttributes();
|
||||||
int items = file.getSize();
|
auto items = file.getSize();
|
||||||
cout << "Number of lines: " << items << endl;
|
cout << "Number of lines: " << items << endl;
|
||||||
cout << "Attributes: " << endl;
|
cout << "Attributes: " << endl;
|
||||||
for (auto attribute : attributes) {
|
for (auto attribute : attributes) {
|
||||||
@@ -107,7 +112,7 @@ void process_file(string path, string file_name, bool class_last, int max_depth,
|
|||||||
}
|
}
|
||||||
cout << y[i] << endl;
|
cout << y[i] << endl;
|
||||||
}
|
}
|
||||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||||
auto total = 0;
|
auto total = 0;
|
||||||
for (auto i = 0; i < attributes.size(); i++) {
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||||
@@ -124,12 +129,14 @@ void process_file(string path, string file_name, bool class_last, int max_depth,
|
|||||||
cout << "Total feature states: " << total + attributes.size() << endl;
|
cout << "Total feature states: " << total + attributes.size() << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_all_files(map<string, bool> datasets, string path, int max_depth, int min_length, float max_cutpoints)
|
void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
|
||||||
|
float max_cutpoints)
|
||||||
{
|
{
|
||||||
cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << endl << endl;
|
cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
||||||
|
<< max_cutpoints << endl << endl;
|
||||||
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
||||||
printf("==================== ==== ==== ========\n");
|
printf("==================== ==== ==== ========\n");
|
||||||
for (auto dataset : datasets) {
|
for (const auto& dataset : datasets) {
|
||||||
ArffFiles file;
|
ArffFiles file;
|
||||||
file.load(path + dataset.first + ".arff", dataset.second);
|
file.load(path + dataset.first + ".arff", dataset.second);
|
||||||
auto attributes = file.getAttributes();
|
auto attributes = file.getAttributes();
|
||||||
@@ -138,7 +145,7 @@ void process_all_files(map<string, bool> datasets, string path, int max_depth, i
|
|||||||
size_t timing = 0;
|
size_t timing = 0;
|
||||||
int cut_points = 0;
|
int cut_points = 0;
|
||||||
for (auto i = 0; i < attributes.size(); i++) {
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||||
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
||||||
test.fit(X[i], y);
|
test.fit(X[i], y);
|
||||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||||
@@ -161,8 +168,10 @@ int main(int argc, char** argv)
|
|||||||
{"mfeat-factors", true},
|
{"mfeat-factors", true},
|
||||||
{"test", true}
|
{"test", true}
|
||||||
};
|
};
|
||||||
string file_name, path;
|
string file_name;
|
||||||
int max_depth, min_length;
|
string path;
|
||||||
|
int max_depth;
|
||||||
|
int min_length;
|
||||||
float max_cutpoints;
|
float max_cutpoints;
|
||||||
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
||||||
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
||||||
|
Submodule src/cppmdlp updated: 12222f7903...77135739cf
Reference in New Issue
Block a user