mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Refactor samples and fix Metrics tests
This commit is contained in:
117
samples/ArffFiles.cpp
Normal file
117
samples/ArffFiles.cpp
Normal file
@@ -0,0 +1,117 @@
|
||||
#include "ArffFiles.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArffFiles::ArffFiles()
|
||||
{
|
||||
}
|
||||
vector<string> ArffFiles::getLines()
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
string ArffFiles::getClassName()
|
||||
{
|
||||
return className;
|
||||
}
|
||||
string ArffFiles::getClassType()
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
vector<vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
}
|
||||
generateDataset(classLast);
|
||||
} else
|
||||
throw invalid_argument("Unable to open file");
|
||||
}
|
||||
void ArffFiles::generateDataset(bool classLast)
|
||||
{
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
string ArffFiles::trim(const string& source)
|
||||
{
|
||||
string s(source);
|
||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
|
||||
{
|
||||
vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
map<string, int> labelMap;
|
||||
int i = 0;
|
||||
for (string label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
28
samples/ArffFiles.h
Normal file
28
samples/ArffFiles.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
void generateDataset(bool);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(string, bool = true);
|
||||
vector<string> getLines();
|
||||
unsigned long int getSize();
|
||||
string getClassName();
|
||||
string getClassType();
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels_t);
|
||||
};
|
||||
#endif
|
6
samples/CMakeLists.txt
Normal file
6
samples/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(main)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
add_executable(sample sample.cpp ArffFiles.cpp ../fimdlp/Metrics.cpp ../fimdlp/CPPFImdlp.cpp)
|
54
samples/sample.cpp
Normal file
54
samples/sample.cpp
Normal file
@@ -0,0 +1,54 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include "../fimdlp/CPPFImdlp.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "../fimdlp/testcpp/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute : attributes) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<vector<float>>& X = file.getX();
|
||||
vector<int>& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (auto feature : X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(false);
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
44
samples/sample.py
Normal file
44
samples/sample.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import time
|
||||
import argparse
|
||||
import os
|
||||
from scipy.io import arff
|
||||
import pandas as pd
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from fimdlp.mdlp import FImdlp
|
||||
|
||||
datasets = {
|
||||
"mfeat-factors": True,
|
||||
"iris": True,
|
||||
"letter": True,
|
||||
"kdd_JapaneseVowels": False,
|
||||
}
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--proposal", action="store_true")
|
||||
ap.add_argument("--original", dest="proposal", action="store_false")
|
||||
ap.add_argument("dataset", type=str, choices=datasets.keys())
|
||||
args = ap.parse_args()
|
||||
relative = "" if os.path.isdir("fimdlp") else ".."
|
||||
file_name = os.path.join(
|
||||
relative, "fimdlp", "testcpp", "datasets", args.dataset
|
||||
)
|
||||
data = arff.loadarff(file_name + ".arff")
|
||||
df = pd.DataFrame(data[0])
|
||||
class_column = -1 if datasets[args.dataset] else 0
|
||||
class_name = df.columns.to_list()[class_column]
|
||||
X = df.drop(class_name, axis=1)
|
||||
y, _ = pd.factorize(df[class_name])
|
||||
X = X.to_numpy()
|
||||
test = FImdlp(proposal=args.proposal)
|
||||
now = time.time()
|
||||
test.fit(X, y)
|
||||
fit_time = time.time()
|
||||
print("Fitting: ", fit_time - now)
|
||||
now = time.time()
|
||||
Xt = test.transform(X)
|
||||
print("Transforming: ", time.time() - now)
|
||||
print(test.get_cut_points())
|
||||
clf = RandomForestClassifier(random_state=0)
|
||||
print(
|
||||
"Random Forest score with discretized data: ", clf.fit(Xt, y).score(Xt, y)
|
||||
)
|
Reference in New Issue
Block a user