Fix mistake in class type of ArffFiles

Add some type casting to CPPFImdlp
Add additional path to datasets in tests
Fix some smells in sample
Join CMakeLists
This commit is contained in:
2023-03-18 18:40:10 +01:00
parent 1f4abade2c
commit f0845c5bd1
12 changed files with 87 additions and 113 deletions

View File

@@ -2,13 +2,10 @@
#include <fstream>
#include <sstream>
#include <map>
#include <iostream>
using namespace std;
ArffFiles::ArffFiles()
{
}
ArffFiles::ArffFiles() = default;
vector<string> ArffFiles::getLines()
{
return lines;
@@ -37,19 +34,22 @@ vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::load(string fileName, bool classLast)
void ArffFiles::load(const string fileName, bool classLast)
{
ifstream file(fileName);
if (file.is_open()) {
string line, keyword, attribute, type;
string line, keyword, attribute, type, type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
ss >> keyword >> attribute >> type;
attributes.push_back({ attribute, type });
ss >> keyword >> attribute;
type = "";
while(ss >> type_w)
type += type_w + " ";
attributes.emplace_back(attribute, type );
continue;
}
if (line[0] == '@') {
@@ -77,7 +77,7 @@ void ArffFiles::generateDataset(bool classLast)
{
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
vector<string> yy = vector<string>(lines.size(), "");
int labelIndex = classLast ? attributes.size() : 0;
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]);
string value;
@@ -92,7 +92,7 @@ void ArffFiles::generateDataset(bool classLast)
}
y = factorize(yy);
}
string ArffFiles::trim(const string& source)
string ArffFiles::trim(const string& source)
{
string s(source);
s.erase(0, s.find_first_not_of(" \n\r\t"));
@@ -105,7 +105,7 @@ vector<int> ArffFiles::factorize(const vector<string>& labels_t)
yy.reserve(labels_t.size());
map<string, int> labelMap;
int i = 0;
for (string label : labels_t) {
for (const string &label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}

View File

@@ -18,10 +18,10 @@ public:
unsigned long int getSize();
string getClassName();
string getClassType();
string trim(const string&);
static string trim(const string&);
vector<vector<float>>& getX();
vector<int>& getY();
vector<pair<string, string>> getAttributes();
vector<int> factorize(const vector<string>& labels_t);
static vector<int> factorize(const vector<string>& labels_t);
};
#endif

View File

@@ -1,8 +1,5 @@
cmake_minimum_required(VERSION 3.14)
project(FImdlp)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 11)
include(FetchContent)
include_directories(${GTEST_INCLUDE_DIRS})
@@ -18,7 +15,7 @@ FetchContent_MakeAvailable(googletest)
enable_testing()
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
target_link_libraries(Metrics_unittest GTest::gtest_main)
target_link_libraries(FImdlp_unittest GTest::gtest_main)
target_compile_options(Metrics_unittest PRIVATE --coverage)

View File

@@ -1,8 +1,9 @@
#include "gtest/gtest.h"
#include "../Metrics.h"
#include "../CPPFImdlp.h"
#include "ArffFiles.h"
#include <fstream>
#include <iostream>
#include "ArffFiles.h"
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
try { \
stmt; \
@@ -17,11 +18,23 @@ namespace mdlp {
public:
precision_t precision = 0.000001;
TestFImdlp(): CPPFImdlp() {}
string data_path;
void SetUp()
{
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
fit(X, y);
data_path = set_data_path();
}
string set_data_path()
{
string path = "../datasets/";
ifstream file(path+"iris.arff");
if (file.is_open()) {
file.close();
return path;
}
return "../../tests/datasets/";
}
void checkSortedVector()
{
@@ -37,6 +50,7 @@ namespace mdlp {
{
EXPECT_EQ(computed.size(), expected.size());
for (unsigned long i = 0; i < computed.size(); i++) {
cout << "(" << computed[i] << ", " << expected[i] << ") ";
EXPECT_NEAR(computed[i], expected[i], precision);
}
}
@@ -64,7 +78,7 @@ namespace mdlp {
void test_dataset(CPPFImdlp& test, string filename, vector<cutPoints_t>& expected, int depths[])
{
ArffFiles file;
file.load("../datasets/" + filename + ".arff", true);
file.load(data_path + filename + ".arff", true);
vector<samples_t>& X = file.getX();
labels_t& y = file.getY();
auto attributes = file.getAttributes();
@@ -73,10 +87,8 @@ namespace mdlp {
EXPECT_EQ(test.get_depth(), depths[feature]);
auto computed = test.getCutPoints();
cout << "Feature " << feature << ": ";
for (auto item : computed)
cout << item << " ";
cout << endl;
checkCutPoints(computed, expected[feature]);
cout << endl;
}
}
};

View File

@@ -114,7 +114,7 @@
@attribute 'Ca' real
@attribute 'Ba' real
@attribute 'Fe' real
@attribute 'Type' { 'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
@attribute 'Type' {'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
@data
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'