Move sources to a folder and change ArffFiles files to library

This commit is contained in:
2024-07-04 13:49:15 +02:00
parent 32b956a361
commit 44accc3db1
24 changed files with 69 additions and 224 deletions

View File

@@ -1,138 +0,0 @@
// ****************************************************************
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX - FileType: SOURCE
// SPDX - License - Identifier: MIT
// ****************************************************************
#include "ArffFiles.h"
#include <fstream>
#include <sstream>
#include <map>
using namespace std;
ArffFiles::ArffFiles() = default;
vector<string> ArffFiles::getLines() const
{
return lines;
}
unsigned long int ArffFiles::getSize() const
{
return lines.size();
}
vector<pair<string, string>> ArffFiles::getAttributes() const
{
return attributes;
}
string ArffFiles::getClassName() const
{
return className;
}
string ArffFiles::getClassType() const
{
return classType;
}
vector<mdlp::samples_t>& ArffFiles::getX()
{
return X;
}
vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::load(const string& fileName, bool classLast)
{
ifstream file(fileName);
if (!file.is_open()) {
throw invalid_argument("Unable to open file");
}
string line;
string keyword;
string attribute;
string type;
string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(trim(attribute), trim(type));
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw invalid_argument("No attributes found");
if (classLast) {
className = get<0>(attributes.back());
classType = get<1>(attributes.back());
attributes.pop_back();
} else {
className = get<0>(attributes.front());
classType = get<1>(attributes.front());
attributes.erase(attributes.begin());
}
generateDataset(classLast);
}
void ArffFiles::generateDataset(bool classLast)
{
X = vector<mdlp::samples_t>(attributes.size(), mdlp::samples_t(lines.size()));
auto yy = vector<string>(lines.size(), "");
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]);
string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
if (pos++ == labelIndex) {
yy[i] = value;
} else {
X[xIndex++][i] = stof(value);
}
}
}
y = factorize(yy);
}
string ArffFiles::trim(const string& source)
{
string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
{
vector<int> yy;
yy.reserve(labels_t.size());
map<string, int> labelMap;
int i = 0;
for (const string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}
yy.push_back(labelMap[label]);
}
return yy;
}

View File

@@ -1,41 +0,0 @@
// ****************************************************************
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX - FileType: SOURCE
// SPDX - License - Identifier: MIT
// ****************************************************************
#ifndef ARFFFILES_H
#define ARFFFILES_H
#include <string>
#include <vector>
#include "../typesFImdlp.h"
using namespace std;
class ArffFiles {
private:
vector<string> lines;
vector<pair<string, string>> attributes;
string className;
string classType;
vector<mdlp::samples_t> X;
vector<int> y;
void generateDataset(bool);
public:
ArffFiles();
void load(const string&, bool = true);
vector<string> getLines() const;
unsigned long int getSize() const;
string getClassName() const;
string getClassType() const;
static string trim(const string&);
vector<mdlp::samples_t>& getX();
vector<int>& getY();
vector<pair<string, string>> getAttributes() const;
static vector<int> factorize(const vector<string>& labels_t);
};
#endif

View File

@@ -8,8 +8,8 @@
#include <string>
#include <iostream>
#include "gtest/gtest.h"
#include "ArffFiles.h"
#include "../BinDisc.h"
#include <ArffFiles.hpp>
#include "BinDisc.h"
#include "Experiments.hpp"
namespace mdlp {

View File

@@ -1,6 +1,3 @@
cmake_minimum_required(VERSION 3.20)
set(CMAKE_CXX_STANDARD 17)
cmake_policy(SET CMP0135 NEW)
include(FetchContent)
include_directories(${GTEST_INCLUDE_DIRS})
FetchContent_Declare(
@@ -11,28 +8,30 @@ FetchContent_Declare(
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
find_package(Torch REQUIRED)
include_directories(
${TORCH_INCLUDE_DIRS}
${mdlp_SOURCE_DIR}/src
${mdlp_SOURCE_DIR}/tests/lib/Files
)
enable_testing()
include_directories(${TORCH_INCLUDE_DIRS})
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
add_executable(Metrics_unittest ${mdlp_SOURCE_DIR}/src/Metrics.cpp Metrics_unittest.cpp)
target_link_libraries(Metrics_unittest GTest::gtest_main)
target_compile_options(Metrics_unittest PRIVATE --coverage)
target_link_options(Metrics_unittest PRIVATE --coverage)
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp ../Discretizer.cpp)
add_executable(FImdlp_unittest FImdlp_unittest.cpp
${mdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${mdlp_SOURCE_DIR}/src/Metrics.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp)
target_link_libraries(FImdlp_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
target_compile_options(FImdlp_unittest PRIVATE --coverage)
target_link_options(FImdlp_unittest PRIVATE --coverage)
add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp ../Discretizer.cpp)
add_executable(BinDisc_unittest BinDisc_unittest.cpp ${mdlp_SOURCE_DIR}/src/BinDisc.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp)
target_link_libraries(BinDisc_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
target_compile_options(BinDisc_unittest PRIVATE --coverage)
target_link_options(BinDisc_unittest PRIVATE --coverage)
add_executable(Discretizer_unittest ../BinDisc.cpp ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp ../Discretizer.cpp Discretizer_unittest.cpp)
add_executable(Discretizer_unittest Discretizer_unittest.cpp
${mdlp_SOURCE_DIR}/src/BinDisc.cpp ${mdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${mdlp_SOURCE_DIR}/src/Metrics.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp )
target_link_libraries(Discretizer_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
target_compile_options(Discretizer_unittest PRIVATE --coverage)
target_link_options(Discretizer_unittest PRIVATE --coverage)

View File

@@ -7,11 +7,11 @@
#include <fstream>
#include <string>
#include <iostream>
#include <ArffFiles.hpp>
#include "gtest/gtest.h"
#include "ArffFiles.h"
#include "../Discretizer.h"
#include "../BinDisc.h"
#include "../CPPFImdlp.h"
#include "Discretizer.h"
#include "BinDisc.h"
#include "CPPFImdlp.h"
namespace mdlp {
const float margin = 1e-4;

View File

@@ -12,7 +12,7 @@
#include<fstream>
#include<vector>
#include<tuple>
#include "../typesFImdlp.h"
#include "typesFImdlp.h"
template <typename T>
void show_vector(const std::vector<T>& data, std::string title)

View File

@@ -4,12 +4,12 @@
// SPDX - License - Identifier: MIT
// ****************************************************************
#include "gtest/gtest.h"
#include "../Metrics.h"
#include "../CPPFImdlp.h"
#include <fstream>
#include <iostream>
#include "ArffFiles.h"
#include <ArffFiles.hpp>
#include "gtest/gtest.h"
#include "Metrics.h"
#include "CPPFImdlp.h"
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
try { \

View File

@@ -5,7 +5,7 @@
// ****************************************************************
#include "gtest/gtest.h"
#include "../Metrics.h"
#include "Metrics.h"
namespace mdlp {
class TestMetrics : public Metrics, public testing::Test {

1
tests/lib/Files Submodule

Submodule tests/lib/Files added at a5316928d4

View File

@@ -201,7 +201,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.1.undefined"
}
},
"nbformat": 4,