mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Update samples
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(main)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_executable(sample sample.cpp ../src/cppmdlp/tests/ArffFiles.cpp ../src/cppmdlp/Metrics.cpp ../src/cppmdlp/CPPFImdlp.cpp)
|
||||
|
@@ -1,28 +1,94 @@
|
||||
#include "../src/cppmdlp/tests/ArffFiles.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <getopt.h>
|
||||
#include "../src/cppmdlp/CPPFImdlp.h"
|
||||
#include "../src/cppmdlp/tests/ArffFiles.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mdlp;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
const string PATH = "../../src/cppmdlp/tests/datasets/";
|
||||
|
||||
/* print a description of all supported options */
|
||||
void usage(const char* path)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "../../src/cppmdlp/tests/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
return 1;
|
||||
/* take only the last portion of the path */
|
||||
const char* basename = strrchr(path, '/');
|
||||
basename = basename ? basename + 1 : path;
|
||||
|
||||
cout << "usage: " << basename << "[OPTION]" << endl;
|
||||
cout << " -h, --help\t\t Print this help and exit." << endl;
|
||||
cout << " -f, --file[=FILENAME]\t {all, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}." << endl;
|
||||
cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
|
||||
cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
|
||||
cout << " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 = any" << endl;
|
||||
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
|
||||
}
|
||||
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
||||
{
|
||||
string file_name;
|
||||
string path = PATH;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
int min_length = 3;
|
||||
float max_cutpoints = 0;
|
||||
static struct option long_options[] = {
|
||||
{ "help", no_argument, 0, 'h' },
|
||||
{ "file", required_argument, 0, 'f' },
|
||||
{ "path", required_argument, 0, 'p' },
|
||||
{ "max_depth", required_argument, 0, 'm' },
|
||||
{ "max_cutpoints", required_argument, 0, 'c' },
|
||||
{ "min_length", required_argument, 0, 'n' },
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
while (1) {
|
||||
auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options, 0);
|
||||
if (c == -1)
|
||||
break;
|
||||
switch (c) {
|
||||
case 'h':
|
||||
usage(argv[0]);
|
||||
exit(0);
|
||||
case 'f':
|
||||
file_name = optarg;
|
||||
break;
|
||||
case 'm':
|
||||
max_depth = atoi(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
min_length = atoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
max_cutpoints = atof(optarg);
|
||||
break;
|
||||
case 'p':
|
||||
path = optarg;
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
break;
|
||||
case '?':
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
if (file_name.empty()) {
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
|
||||
}
|
||||
|
||||
void process_file(string path, string file_name, bool class_last, int max_depth, int min_length, float max_cutpoints)
|
||||
{
|
||||
ArffFiles file;
|
||||
|
||||
file.load(path + file_name + ".arff", class_last);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
@@ -33,22 +99,85 @@ int main(int argc, char** argv)
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<vector<float>>& X = file.getX();
|
||||
vector<int>& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (auto feature : X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
auto total = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
total += test.getCutPoints().size();
|
||||
}
|
||||
cout << "Total cut points ...: " << total << endl;
|
||||
cout << "Total feature states: " << total + attributes.size() << endl;
|
||||
}
|
||||
|
||||
void process_all_files(map<string, bool> datasets, string path, int max_depth, int min_length, float max_cutpoints)
|
||||
{
|
||||
cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << endl << endl;
|
||||
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
||||
printf("==================== ==== ==== ========\n");
|
||||
for (auto dataset : datasets) {
|
||||
ArffFiles file;
|
||||
file.load(path + dataset.first + ".arff", dataset.second);
|
||||
auto attributes = file.getAttributes();
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
size_t timing = 0;
|
||||
int cut_points = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
||||
test.fit(X[i], y);
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
timing += std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
|
||||
cut_points += test.getCutPoints().size();
|
||||
}
|
||||
printf("%-20s %4lu %4d %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
map<string, bool> datasets = {
|
||||
{"glass", true},
|
||||
{"iris", true},
|
||||
{"kdd_JapaneseVowels", false},
|
||||
{"letter", true},
|
||||
{"liver-disorders", true},
|
||||
{"mfeat-factors", true},
|
||||
{"test", true}
|
||||
};
|
||||
string file_name, path;
|
||||
int max_depth, min_length;
|
||||
float max_cutpoints;
|
||||
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
||||
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
||||
cout << "Invalid file name: " << file_name << endl;
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
if (file_name == "all")
|
||||
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
||||
else {
|
||||
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
||||
cout << "File name ....: " << file_name << endl;
|
||||
cout << "Max depth ....: " << max_depth << endl;
|
||||
cout << "Min length ...: " << min_length << endl;
|
||||
cout << "Max cutpoints : " << max_cutpoints << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -9,13 +9,19 @@ from fimdlp.mdlp import FImdlp
|
||||
datasets = {
|
||||
"mfeat-factors": True,
|
||||
"iris": True,
|
||||
"glass": True,
|
||||
"liver-disorders": True,
|
||||
"letter": True,
|
||||
"kdd_JapaneseVowels": False,
|
||||
}
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"--alternative", dest="proposal", action="store_const", const=1
|
||||
"--min_length", type=int, default=3, help="Minimum length of interval"
|
||||
)
|
||||
ap.add_argument("--max_depth", type=int, default=9999, help="Maximum depth")
|
||||
ap.add_argument(
|
||||
"--max_cuts", type=float, default=0, help="Maximum number of cut points"
|
||||
)
|
||||
ap.add_argument("dataset", type=str, choices=datasets.keys())
|
||||
args = ap.parse_args()
|
||||
@@ -30,7 +36,11 @@ class_name = df.columns.to_list()[class_column]
|
||||
X = df.drop(class_name, axis=1)
|
||||
y, _ = pd.factorize(df[class_name])
|
||||
X = X.to_numpy()
|
||||
test = FImdlp(algorithm=args.proposal if args.proposal is not None else 0)
|
||||
test = FImdlp(
|
||||
min_length=args.min_length,
|
||||
max_depth=args.max_depth,
|
||||
max_cuts=args.max_cuts,
|
||||
)
|
||||
now = time.time()
|
||||
test.fit(X, y)
|
||||
fit_time = time.time()
|
||||
|
Submodule src/cppmdlp updated: ed7433672d...770502c8e5
Reference in New Issue
Block a user