diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index bd951fb..0000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "tests/lib/Catch2"] - path = tests/lib/Catch2 - url = https://github.com/catchorg/Catch2.git -[submodule "tests/lib/mdlp"] - path = tests/lib/mdlp - url = https://github.com/rmontanana/mdlp diff --git a/conanfile.py b/conanfile.py new file mode 100644 index 0000000..95f8764 --- /dev/null +++ b/conanfile.py @@ -0,0 +1,33 @@ +import re +from conan import ConanFile +from conan.tools.files import copy + + +class FoldingConan(ConanFile): + name = "folding" + version = "X.X.X" + description = "K-Fold and stratified K-Fold header-only library" + url = "https://github.com/rmontanana/folding" + license = "MIT" + homepage = "https://github.com/rmontanana/ArffFiles" + topics = ("kfold", "stratified folding") + no_copy_source = True + exports_sources = "folding.hpp" + package_type = "header-library" + + def init(self): + # Read the CMakeLists.txt file to get the version + with open("folding.hpp", "r") as f: + content = f.read() + match = re.search( + r'const std::string FOLDING_VERSION = "([^"]+)";', content + ) + if match: + self.version = match.group(1) + + def package(self): + copy(self, "*.hpp", self.source_folder, self.package_folder) + + def package_info(self): + self.cpp_info.bindirs = [] + self.cpp_info.libdirs = [] diff --git a/tests/lib/Catch2 b/tests/lib/Catch2 deleted file mode 160000 index 8039e3e..0000000 --- a/tests/lib/Catch2 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8039e3ea1e1db508c590a82048d502f30f28688b diff --git a/tests/lib/Files/ArffFiles.cc b/tests/lib/Files/ArffFiles.cc deleted file mode 100644 index d333d1e..0000000 --- a/tests/lib/Files/ArffFiles.cc +++ /dev/null @@ -1,174 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#include "ArffFiles.h" -#include -#include -#include -#include - -ArffFiles::ArffFiles() = default; - -std::vector ArffFiles::getLines() const -{ - return lines; -} - -unsigned long int ArffFiles::getSize() const -{ - return lines.size(); -} - -std::vector> ArffFiles::getAttributes() const -{ - return attributes; -} - -std::string ArffFiles::getClassName() const -{ - return className; -} - -std::string ArffFiles::getClassType() const -{ - return classType; -} - -std::vector>& ArffFiles::getX() -{ - return X; -} - -std::vector& ArffFiles::getY() -{ - return y; -} - -void ArffFiles::loadCommon(std::string fileName) -{ - std::ifstream file(fileName); - if (!file.is_open()) { - throw std::invalid_argument("Unable to open file"); - } - std::string line; - std::string keyword; - std::string attribute; - std::string type; - std::string type_w; - while (getline(file, line)) { - if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { - continue; - } - if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) { - std::stringstream ss(line); - ss >> keyword >> attribute; - type = ""; - while (ss >> type_w) - type += type_w + " "; - attributes.emplace_back(trim(attribute), trim(type)); - continue; - } - if (line[0] == '@') { - continue; - } - lines.push_back(line); - } - file.close(); - if (attributes.empty()) - throw std::invalid_argument("No attributes found"); -} - -void ArffFiles::load(const std::string& fileName, bool classLast) -{ - int labelIndex; - loadCommon(fileName); - if (classLast) { - className = std::get<0>(attributes.back()); - classType = std::get<1>(attributes.back()); - attributes.pop_back(); - labelIndex = static_cast(attributes.size()); - } else { - className = std::get<0>(attributes.front()); - classType = std::get<1>(attributes.front()); - attributes.erase(attributes.begin()); - labelIndex = 0; - } - generateDataset(labelIndex); -} -void ArffFiles::load(const std::string& fileName, const std::string& name) -{ - int labelIndex; - loadCommon(fileName); - bool found = false; - for (int i = 0; i < attributes.size(); ++i) { - if (attributes[i].first == name) { - className = std::get<0>(attributes[i]); - classType = std::get<1>(attributes[i]); - attributes.erase(attributes.begin() + i); - labelIndex = i; - found = true; - break; - } - } - if (!found) { - throw std::invalid_argument("Class name not found"); - } - generateDataset(labelIndex); -} - -void ArffFiles::generateDataset(int labelIndex) -{ - X = std::vector>(attributes.size(), std::vector(lines.size())); - auto yy = std::vector(lines.size(), ""); - auto removeLines = std::vector(); // Lines with missing values - for (size_t i = 0; i < lines.size(); i++) { - std::stringstream ss(lines[i]); - std::string value; - int pos = 0; - int xIndex = 0; - while (getline(ss, value, ',')) { - if (pos++ == labelIndex) { - yy[i] = value; - } else { - if (value == "?") { - X[xIndex++][i] = -1; - removeLines.push_back(i); - } else - X[xIndex++][i] = stof(value); - } - } - } - for (auto i : removeLines) { - yy.erase(yy.begin() + i); - for (auto& x : X) { - x.erase(x.begin() + i); - } - } - y = factorize(yy); -} - -std::string ArffFiles::trim(const std::string& source) -{ - std::string s(source); - s.erase(0, s.find_first_not_of(" '\n\r\t")); - s.erase(s.find_last_not_of(" '\n\r\t") + 1); - return s; -} - -std::vector ArffFiles::factorize(const std::vector& labels_t) -{ - std::vector yy; - yy.reserve(labels_t.size()); - std::map labelMap; - int i = 0; - for (const std::string& label : labels_t) { - if (labelMap.find(label) == labelMap.end()) { - labelMap[label] = i++; - } - yy.push_back(labelMap[label]); - } - return yy; -} \ No newline at end of file diff --git a/tests/lib/Files/ArffFiles.h b/tests/lib/Files/ArffFiles.h deleted file mode 100644 index c53534c..0000000 --- a/tests/lib/Files/ArffFiles.h +++ /dev/null @@ -1,38 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#ifndef ARFFFILES_H -#define ARFFFILES_H - -#include -#include - -class ArffFiles { -public: - ArffFiles(); - void load(const std::string&, bool = true); - void load(const std::string&, const std::string&); - std::vector getLines() const; - unsigned long int getSize() const; - std::string getClassName() const; - std::string getClassType() const; - static std::string trim(const std::string&); - std::vector>& getX(); - std::vector& getY(); - std::vector> getAttributes() const; - static std::vector factorize(const std::vector& labels_t); -private: - std::vector lines; - std::vector> attributes; - std::string className; - std::string classType; - std::vector> X; - std::vector y; - void generateDataset(int); - void loadCommon(std::string); -}; - -#endif \ No newline at end of file diff --git a/tests/lib/Files/CMakeLists.txt b/tests/lib/Files/CMakeLists.txt deleted file mode 100644 index fce5b8f..0000000 --- a/tests/lib/Files/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_library(ArffFiles ArffFiles.cc) \ No newline at end of file diff --git a/tests/lib/mdlp b/tests/lib/mdlp deleted file mode 160000 index 236d1b2..0000000 --- a/tests/lib/mdlp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 236d1b2f8be185039493fe7fce04a83e02ed72e5