From 9966ba4af8af4b23ea4393ebd298538895cc9d8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 9 Jan 2025 10:04:16 +0100 Subject: [PATCH] Move tests library to tests/lib Add PBC4cip classifier --- .gitmodules | 7 +- CMakeLists.txt | 6 +- cmake/modules/CodeCoverage.cmake | 8 +- lib/Files/ArffFiles.cc | 168 ------------------------------- lib/Files/ArffFiles.h | 32 ------ lib/Files/CMakeLists.txt | 1 - lib/json | 2 +- pyclfs/CMakeLists.txt | 2 +- pyclfs/PBC4cip.cc | 8 ++ pyclfs/PBC4cip.h | 13 +++ tests/TestPythonClassifiers.cc | 12 ++- tests/lib/Files | 1 + {lib => tests/lib}/catch2 | 0 {lib => tests/lib}/mdlp | 0 14 files changed, 49 insertions(+), 211 deletions(-) delete mode 100644 lib/Files/ArffFiles.cc delete mode 100644 lib/Files/ArffFiles.h delete mode 100644 lib/Files/CMakeLists.txt create mode 100644 pyclfs/PBC4cip.cc create mode 100644 pyclfs/PBC4cip.h create mode 160000 tests/lib/Files rename {lib => tests/lib}/catch2 (100%) rename {lib => tests/lib}/mdlp (100%) diff --git a/.gitmodules b/.gitmodules index 65a5706..960d375 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,8 +3,11 @@ path = lib/json url = https://github.com/nlohmann/json.git [submodule "lib/catch2"] - path = lib/catch2 + path = tests/lib/catch2 url = https://github.com/catchorg/Catch2.git [submodule "lib/mdlp"] - path = lib/mdlp + path = tests/lib/mdlp url = https://github.com/rmontanana/mdlp +[submodule "tests/lib/Files"] + path = tests/lib/Files + url = https://github.com/rmontanana/ArffFiles diff --git a/CMakeLists.txt b/CMakeLists.txt index f48212b..0235bed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,9 +78,9 @@ add_subdirectory(pyclfs) # ------- if (ENABLE_TESTING) MESSAGE("Testing enabled") - add_git_submodule(lib/catch2) - add_git_submodule(lib/mdlp) - add_subdirectory(lib/Files) + add_git_submodule(tests/lib/catch2) + add_git_submodule(tests/lib/mdlp) + add_subdirectory(tests/lib/Files) include(CTest) add_subdirectory(tests) endif (ENABLE_TESTING) diff --git a/cmake/modules/CodeCoverage.cmake b/cmake/modules/CodeCoverage.cmake index d4a039f..670dea8 100644 --- a/cmake/modules/CodeCoverage.cmake +++ b/cmake/modules/CodeCoverage.cmake @@ -137,7 +137,7 @@ include(CMakeParseArguments) -option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) +option(CODE_COVERAGE_VERBOSE "Verbose information" TRUE) # Check prereqs find_program( GCOV_PATH gcov ) @@ -160,7 +160,11 @@ foreach(LANG ${LANGUAGES}) endif() elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") - message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + if ("${LANG}" MATCHES "CUDA") + message(STATUS "Ignoring CUDA") + else() + message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + endif() endif() endforeach() diff --git a/lib/Files/ArffFiles.cc b/lib/Files/ArffFiles.cc deleted file mode 100644 index 99f29bd..0000000 --- a/lib/Files/ArffFiles.cc +++ /dev/null @@ -1,168 +0,0 @@ -#include "ArffFiles.h" -#include -#include -#include -#include - -ArffFiles::ArffFiles() = default; - -std::vector ArffFiles::getLines() const -{ - return lines; -} - -unsigned long int ArffFiles::getSize() const -{ - return lines.size(); -} - -std::vector> ArffFiles::getAttributes() const -{ - return attributes; -} - -std::string ArffFiles::getClassName() const -{ - return className; -} - -std::string ArffFiles::getClassType() const -{ - return classType; -} - -std::vector>& ArffFiles::getX() -{ - return X; -} - -std::vector& ArffFiles::getY() -{ - return y; -} - -void ArffFiles::loadCommon(std::string fileName) -{ - std::ifstream file(fileName); - if (!file.is_open()) { - throw std::invalid_argument("Unable to open file"); - } - std::string line; - std::string keyword; - std::string attribute; - std::string type; - std::string type_w; - while (getline(file, line)) { - if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { - continue; - } - if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) { - std::stringstream ss(line); - ss >> keyword >> attribute; - type = ""; - while (ss >> type_w) - type += type_w + " "; - attributes.emplace_back(trim(attribute), trim(type)); - continue; - } - if (line[0] == '@') { - continue; - } - lines.push_back(line); - } - file.close(); - if (attributes.empty()) - throw std::invalid_argument("No attributes found"); -} - -void ArffFiles::load(const std::string& fileName, bool classLast) -{ - int labelIndex; - loadCommon(fileName); - if (classLast) { - className = std::get<0>(attributes.back()); - classType = std::get<1>(attributes.back()); - attributes.pop_back(); - labelIndex = static_cast(attributes.size()); - } else { - className = std::get<0>(attributes.front()); - classType = std::get<1>(attributes.front()); - attributes.erase(attributes.begin()); - labelIndex = 0; - } - generateDataset(labelIndex); -} -void ArffFiles::load(const std::string& fileName, const std::string& name) -{ - int labelIndex; - loadCommon(fileName); - bool found = false; - for (int i = 0; i < attributes.size(); ++i) { - if (attributes[i].first == name) { - className = std::get<0>(attributes[i]); - classType = std::get<1>(attributes[i]); - attributes.erase(attributes.begin() + i); - labelIndex = i; - found = true; - break; - } - } - if (!found) { - throw std::invalid_argument("Class name not found"); - } - generateDataset(labelIndex); -} - -void ArffFiles::generateDataset(int labelIndex) -{ - X = std::vector>(attributes.size(), std::vector(lines.size())); - auto yy = std::vector(lines.size(), ""); - auto removeLines = std::vector(); // Lines with missing values - for (size_t i = 0; i < lines.size(); i++) { - std::stringstream ss(lines[i]); - std::string value; - int pos = 0; - int xIndex = 0; - while (getline(ss, value, ',')) { - if (pos++ == labelIndex) { - yy[i] = value; - } else { - if (value == "?") { - X[xIndex++][i] = -1; - removeLines.push_back(i); - } else - X[xIndex++][i] = stof(value); - } - } - } - for (auto i : removeLines) { - yy.erase(yy.begin() + i); - for (auto& x : X) { - x.erase(x.begin() + i); - } - } - y = factorize(yy); -} - -std::string ArffFiles::trim(const std::string& source) -{ - std::string s(source); - s.erase(0, s.find_first_not_of(" '\n\r\t")); - s.erase(s.find_last_not_of(" '\n\r\t") + 1); - return s; -} - -std::vector ArffFiles::factorize(const std::vector& labels_t) -{ - std::vector yy; - yy.reserve(labels_t.size()); - std::map labelMap; - int i = 0; - for (const std::string& label : labels_t) { - if (labelMap.find(label) == labelMap.end()) { - labelMap[label] = i++; - } - yy.push_back(labelMap[label]); - } - return yy; -} \ No newline at end of file diff --git a/lib/Files/ArffFiles.h b/lib/Files/ArffFiles.h deleted file mode 100644 index 25e5a8c..0000000 --- a/lib/Files/ArffFiles.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef ARFFFILES_H -#define ARFFFILES_H - -#include -#include - -class ArffFiles { -private: - std::vector lines; - std::vector> attributes; - std::string className; - std::string classType; - std::vector> X; - std::vector y; - void generateDataset(int); - void loadCommon(std::string); -public: - ArffFiles(); - void load(const std::string&, bool = true); - void load(const std::string&, const std::string&); - std::vector getLines() const; - unsigned long int getSize() const; - std::string getClassName() const; - std::string getClassType() const; - static std::string trim(const std::string&); - std::vector>& getX(); - std::vector& getY(); - std::vector> getAttributes() const; - static std::vector factorize(const std::vector& labels_t); -}; - -#endif \ No newline at end of file diff --git a/lib/Files/CMakeLists.txt b/lib/Files/CMakeLists.txt deleted file mode 100644 index fce5b8f..0000000 --- a/lib/Files/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_library(ArffFiles ArffFiles.cc) \ No newline at end of file diff --git a/lib/json b/lib/json index 378e091..48e7b4c 160000 --- a/lib/json +++ b/lib/json @@ -1 +1 @@ -Subproject commit 378e091795a70fced276cd882bd8a6a428668fe5 +Subproject commit 48e7b4c23b089c088c11e51c824d78d0f0949b40 diff --git a/pyclfs/CMakeLists.txt b/pyclfs/CMakeLists.txt index 541bb7e..3467699 100644 --- a/pyclfs/CMakeLists.txt +++ b/pyclfs/CMakeLists.txt @@ -4,5 +4,5 @@ include_directories( ${PyClassifiers_SOURCE_DIR}/lib/json/include ${Bayesnet_INCLUDE_DIRS} ) -add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc) +add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc PBC4cip.cc) target_link_libraries(PyClassifiers ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy) \ No newline at end of file diff --git a/pyclfs/PBC4cip.cc b/pyclfs/PBC4cip.cc new file mode 100644 index 0000000..a40f1fa --- /dev/null +++ b/pyclfs/PBC4cip.cc @@ -0,0 +1,8 @@ +#include "PBC4cip.h" + +namespace pywrap { + PBC4cip::PBC4cip() : PyClassifier("core.PBC4cip", "PBC4cip", true) + { + validHyperparameters = { "random_state" }; + } +} /* namespace pywrap */ \ No newline at end of file diff --git a/pyclfs/PBC4cip.h b/pyclfs/PBC4cip.h new file mode 100644 index 0000000..fc19872 --- /dev/null +++ b/pyclfs/PBC4cip.h @@ -0,0 +1,13 @@ +#ifndef PBC4CIP_H +#define PBC4CIP_H +#include "PyClassifier.h" + +namespace pywrap { + class PBC4cip : public PyClassifier { + public: + PBC4cip(); + ~PBC4cip() = default; + }; + +} /* namespace pywrap */ +#endif /* PBC4CIP_H */ \ No newline at end of file diff --git a/tests/TestPythonClassifiers.cc b/tests/TestPythonClassifiers.cc index 8c2b22c..83c2b0f 100644 --- a/tests/TestPythonClassifiers.cc +++ b/tests/TestPythonClassifiers.cc @@ -135,4 +135,14 @@ TEST_CASE("XGBoost", "[PyClassifiers]") // // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon)); // } // std::cout << predict << std::endl; -// } \ No newline at end of file +// } +TEST_CASE("PBC4cip", "[PyClassifiers]") +{ + auto raw = RawDatasets("iris", true); + auto clf = pywrap::PBC4cip(); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + nlohmann::json hyperparameters = { }; + clf.setHyperparameters(hyperparameters); + auto score = clf.score(raw.Xt, raw.yt); + REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon)); +} \ No newline at end of file diff --git a/tests/lib/Files b/tests/lib/Files new file mode 160000 index 0000000..a4329f5 --- /dev/null +++ b/tests/lib/Files @@ -0,0 +1 @@ +Subproject commit a4329f5f9dfdb18ee3faa63bd5b665f2f253b8d2 diff --git a/lib/catch2 b/tests/lib/catch2 similarity index 100% rename from lib/catch2 rename to tests/lib/catch2 diff --git a/lib/mdlp b/tests/lib/mdlp similarity index 100% rename from lib/mdlp rename to tests/lib/mdlp