Move tests library to tests/lib

Add PBC4cip classifier
2025-01-09 10:04:16 +01:00
20 changed files with 96 additions and 187 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,13 @@
+
+[submodule "lib/json"]
+	path = lib/json
+	url = https://github.com/nlohmann/json.git
+[submodule "lib/catch2"]
+	path = tests/lib/catch2
+	url = https://github.com/catchorg/Catch2.git
+[submodule "lib/mdlp"]
+	path = tests/lib/mdlp
+	url = https://github.com/rmontanana/mdlp
+[submodule "tests/lib/Files"]
+	path = tests/lib/Files
+	url = https://github.com/rmontanana/ArffFiles
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,8 +45,6 @@ endif()
 find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
 message("Python3_LIBRARIES=${Python3_LIBRARIES}")

-find_package(nlohmann_json CONFIG REQUIRED)
-
 # CMakes modules
 # --------------
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -66,10 +64,9 @@ endif (ENABLE_CLANG_TIDY)

 # External libraries - dependencies of PyClassifiers
 # --------------------------------------------------
-find_library(bayesnet NAMES libbayesnet bayesnet libbayesnet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
-find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ../lib/include)
-
-message(STATUS "BayesNet=${bayesnet}")
+find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
+find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${PyClassifiers_SOURCE_DIR}/../lib/include)
+message(STATUS "BayesNet=${BayesNet}")
 message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")


@@ -81,8 +78,9 @@ add_subdirectory(pyclfs)
 # -------
 if (ENABLE_TESTING)
  MESSAGE("Testing enabled")
-  find_package(Catch2 CONFIG REQUIRED)
-  find_package(arff-files CONFIG REQUIRED)
+  add_git_submodule(tests/lib/catch2)
+  add_git_submodule(tests/lib/mdlp)
+  add_subdirectory(tests/lib/Files)
  include(CTest)
  add_subdirectory(tests)
 endif (ENABLE_TESTING)
@@ -94,4 +92,4 @@ install(TARGETS PyClassifiers
        LIBRARY DESTINATION lib
        CONFIGURATIONS Release)
 install(DIRECTORY pyclfs/ DESTINATION include/pyclassifiers FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h" PATTERN "*.hpp")
-install(FILES ${Bayesnet_INCLUDE_DIRS}/bayesnet/config.h DESTINATION include/pyclassifiers CONFIGURATIONS Release)
+install(FILES ${Bayesnet_INCLUDE_DIRS}/bayesnet/config.h DESTINATION include/pyclassifiers CONFIGURATIONS Release)
--- a/4
+++ b/4
@@ -52,14 +52,14 @@ debug: ## Build a debug version of the project
 	@echo ">>> Building Debug PyClassifiers...";
 	@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
 	@mkdir $(f_debug); 
-	@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
+	@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
 	@echo ">>> Done";

 release: ## Build a Release version of the project
 	@echo ">>> Building Release PyClassifiers...";
 	@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
 	@mkdir $(f_release); 
-	@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
+	@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
 	@echo ">>> Done";	

 opt = ""
--- a/lib/json
+++ b/lib/json
--- a/pyclfs/AdaBoostPy.cc
+++ b/pyclfs/AdaBoostPy.cc
@@ -1,20 +0,0 @@
-#include "AdaBoostPy.h"
-
-namespace pywrap {
-    AdaBoostPy::AdaBoostPy() : PyClassifier("sklearn.ensemble", "AdaBoostClassifier", true)
-    {
-        validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
-    }
-    int AdaBoostPy::getNumberOfEdges() const
-    {
-        return callMethodSumOfItems("get_n_leaves");
-    }
-    int AdaBoostPy::getNumberOfStates() const
-    {
-        return callMethodSumOfItems("get_depth");
-    }
-    int AdaBoostPy::getNumberOfNodes() const
-    {
-        return callMethodSumOfItems("node_count");
-    }
-} /* namespace pywrap */
--- a/pyclfs/AdaBoostPy.h
+++ b/pyclfs/AdaBoostPy.h
@@ -1,15 +0,0 @@
-#ifndef ADABOOSTPY_H
-#define ADABOOSTPY_H
-#include "PyClassifier.h"
-
-namespace pywrap {
-    class AdaBoostPy : public PyClassifier {
-    public:
-        AdaBoostPy();
-        ~AdaBoostPy() = default;
-        int getNumberOfEdges() const override;
-        int getNumberOfStates() const override;
-        int getNumberOfNodes() const override;
-    };
-} /* namespace pywrap */
-#endif /* ADABOOST_H */
--- a/pyclfs/CMakeLists.txt
+++ b/pyclfs/CMakeLists.txt
@@ -4,5 +4,5 @@ include_directories(
    ${PyClassifiers_SOURCE_DIR}/lib/json/include
    ${Bayesnet_INCLUDE_DIRS}
 )
-add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc AdaBoostPy.cc PyClassifier.cc PyWrap.cc)
-target_link_libraries(PyClassifiers nlohmann_json::nlohmann_json ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)
+add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc PBC4cip.cc)
+target_link_libraries(PyClassifiers ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)
--- a/pyclfs/PBC4cip.cc
+++ b/pyclfs/PBC4cip.cc
@@ -0,0 +1,8 @@
+#include "PBC4cip.h"
+
+namespace pywrap {
+    PBC4cip::PBC4cip() : PyClassifier("core.PBC4cip", "PBC4cip", true)
+    {
+        validHyperparameters = { "random_state" };
+    }
+} /* namespace pywrap */
--- a/pyclfs/PBC4cip.h
+++ b/pyclfs/PBC4cip.h
@@ -0,0 +1,13 @@
+#ifndef PBC4CIP_H
+#define PBC4CIP_H
+#include "PyClassifier.h"
+
+namespace pywrap {
+    class PBC4cip : public PyClassifier {
+    public:
+        PBC4cip();
+        ~PBC4cip() = default;
+    };
+
+} /* namespace pywrap */
+#endif /* PBC4CIP_H */
--- a/pyclfs/PyClassifier.cc
+++ b/pyclfs/PyClassifier.cc
@@ -93,19 +93,11 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
        }
-        if (xgboost) {
-            long* data = reinterpret_cast<long*>(prediction.get_data());
-            std::vector<int> vPrediction(data, data + prediction.shape(0));
-            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
-            Py_XDECREF(incoming);
-            return resultTensor;
-        } else {
-            int* data = reinterpret_cast<int*>(prediction.get_data());
-            std::vector<int> vPrediction(data, data + prediction.shape(0));
-            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
-            Py_XDECREF(incoming);
-            return resultTensor;
-        }
+        int* data = reinterpret_cast<int*>(prediction.get_data());
+        std::vector<int> vPrediction(data, data + prediction.shape(0));
+        auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
+        Py_XDECREF(incoming);
+        return resultTensor;
    }
    torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
    {
@@ -126,19 +118,11 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
        }
-        if (xgboost) {
-            float* data = reinterpret_cast<float*>(prediction.get_data());
-            std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
-            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
-            Py_XDECREF(incoming);
-            return resultTensor;
-        } else {
-            double* data = reinterpret_cast<double*>(prediction.get_data());
-            std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
-            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
-            Py_XDECREF(incoming);
-            return resultTensor;
-        }
+        double* data = reinterpret_cast<double*>(prediction.get_data());
+        std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
+        auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
+        Py_XDECREF(incoming);
+        return resultTensor;
    }
    float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
    {
@@ -151,4 +135,4 @@ namespace pywrap {
    {
        this->hyperparameters = hyperparameters;
    }
-} /* namespace pywrap */
+} /* namespace pywrap */
--- a/pyclfs/PyClassifier.h
+++ b/pyclfs/PyClassifier.h
@@ -49,7 +49,6 @@ namespace pywrap {
        nlohmann::json hyperparameters;
        void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
        std::vector<std::string> notes;
-        bool xgboost = false;
    private:
        PyWrap* pyWrap;
        std::string module;
--- a/pyclfs/XGBoost.cc
+++ b/pyclfs/XGBoost.cc
@@ -5,6 +5,5 @@ namespace pywrap {
    XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
    {
        validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
-        xgboost = true;
    }
 } /* namespace pywrap */
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -2,13 +2,15 @@ if(ENABLE_TESTING)
    set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers")
    include_directories(
        ${PyClassifiers_SOURCE_DIR}
+        ${PyClassifiers_SOURCE_DIR}/lib/Files
+        ${PyClassifiers_SOURCE_DIR}/lib/mdlp
+        ${PyClassifiers_SOURCE_DIR}/lib/json/include
        ${Python3_INCLUDE_DIRS}
        ${TORCH_INCLUDE_DIRS}
-        ${CMAKE_BINARY_DIR}/configured_files/include
        /usr/local/include
    )
    file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
    set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
    add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
-    target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy fimdlp Catch2::Catch2WithMain)
+    target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain)
 endif(ENABLE_TESTING)
--- a/tests/TestPythonClassifiers.cc
+++ b/tests/TestPythonClassifiers.cc
@@ -10,16 +10,14 @@
 #include "pyclfs/SVC.h"
 #include "pyclfs/RandomForest.h"
 #include "pyclfs/XGBoost.h"
-#include "pyclfs/AdaBoostPy.h"
 #include "pyclfs/ODTE.h"
 #include "TestUtils.h"
-#include <iostream>

 TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
 {
    map <pair<std::string, std::string>, float> scores = {
        // Diabetes
-        {{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
+        {{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.854166687}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
        // Ecoli
        {{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
        // Glass
@@ -35,10 +33,10 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
        {"RandomForest", new pywrap::RandomForest()}
    };
    map<std::string, std::string> versions = {
-        {"ODTE", "1.0.0-1"},
-        {"STree", "1.4.0"},
-        {"SVC", "1.5.2"},
-        {"RandomForest", "1.5.2"}
+        {"ODTE", "1.0.0"},
+        {"STree", "1.3.2"},
+        {"SVC", "1.5.1"},
+        {"RandomForest", "1.5.1"}
    };
    auto clf = models[name];

@@ -60,15 +58,6 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
        REQUIRE(clf->getVersion() == versions[name]);
    }
 }
-TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
-{
-    auto raw = RawDatasets("iris", false);
-    auto clf = pywrap::AdaBoostPy();
-    clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
-    clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
-    auto score = clf.score(raw.Xt, raw.yt);
-    REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
-}
 TEST_CASE("Classifiers features", "[PyClassifiers]")
 {
    auto raw = RawDatasets("iris", false);
@@ -127,30 +116,33 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
    clf.setHyperparameters(hyperparameters);
    auto score = clf.score(raw.Xt, raw.yt);
    REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
-    std::cout << "XGBoost score: " << score << std::endl;
 }
-TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
+// TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
+// {
+//     auto raw = RawDatasets("iris", true);
+//     auto clf = pywrap::XGBoost();
+//     clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
+//     // nlohmann::json hyperparameters = { "n_jobs=1" };
+//     // clf.setHyperparameters(hyperparameters);
+//     auto predict = clf.predict(raw.Xt);
+//     for (int row = 0; row < predict.size(0); row++) {
+//         auto sum = 0.0;
+//         for (int col = 0; col < predict.size(1); col++) {
+//             std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
+//             sum += predict[row][col].item<int>();
+//         }
+//         std::cout << std::endl;
+//         // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
+//     }
+//     std::cout << predict << std::endl;
+// }
+TEST_CASE("PBC4cip", "[PyClassifiers]")
 {
    auto raw = RawDatasets("iris", true);
-    auto clf = pywrap::XGBoost();
+    auto clf = pywrap::PBC4cip();
    clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
-    // nlohmann::json hyperparameters = { "n_jobs=1" };
-    // clf.setHyperparameters(hyperparameters);
-    auto predict_proba = clf.predict_proba(raw.Xt);
-    auto predict = clf.predict(raw.Xt);
-    // std::cout << "Predict proba: " << predict_proba << std::endl;
-    // std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
-    // assert(predict.size(0) == predict_proba.size(0));
-    for (int row = 0; row < predict_proba.size(0); row++) {
-        // auto sum = 0.0;
-        // std::cout << "Row " << std::setw(3) << row << ": ";
-        // for (int col = 0; col < predict_proba.size(1); col++) {
-        //     std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
-        //     sum += predict_proba[row][col].item<double>();
-        // }
-        // std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
-        //     // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
-        REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
-        REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
-    }
+    nlohmann::json hyperparameters = { };
+    clf.setHyperparameters(hyperparameters);
+    auto score = clf.score(raw.Xt, raw.yt);
+    REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
 }
--- a/tests/TestUtils.h
+++ b/tests/TestUtils.h
@@ -5,8 +5,8 @@
 #include <vector>
 #include <map>
 #include <tuple>
-#include "ArffFiles/ArffFiles.hpp"
-#include "fimdlp/CPPFImdlp.h"
+#include "ArffFiles.h"
+#include "CPPFImdlp.h"

 bool file_exists(const std::string& name);
 std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
--- a/tests/lib/Files
+++ b/tests/lib/Files
--- a/tests/lib/catch2
+++ b/tests/lib/catch2
--- a/tests/lib/mdlp
+++ b/tests/lib/mdlp
--- a/vcpkg-configuration.json
+++ b/vcpkg-configuration.json
@@ -1,21 +0,0 @@
-{
-  "default-registry": {
-    "kind": "git",
-    "baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605",
-    "repository": "https://github.com/microsoft/vcpkg"
-  },
-  "registries": [
-    {
-      "kind": "git",
-      "repository": "https://github.com/rmontanana/vcpkg-stash",
-      "baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
-      "packages": [
-        "arff-files",
-        "bayesnet",
-        "fimdlp",
-        "folding",
-        "libtorch-bin"
-      ]
-    }
-  ]
-}
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -1,47 +0,0 @@
-  {
-    "name": "platform",
-    "version-string": "1.1.0",
-    "dependencies": [
-      "arff-files",
-      "nlohmann-json",
-      "fimdlp",
-      "libtorch-bin",
-      "folding",
-      "argparse",
-      "catch2"
-    ],
-    "overrides": [
-      {
-        "name": "arff-files",
-        "version": "1.1.0"
-      },
-      {
-        "name": "fimdlp",
-        "version": "2.0.1"
-      },
-      {
-        "name": "libtorch-bin",
-        "version": "2.7.0"
-      },
-      {
-        "name": "bayesnet",
-        "version": "1.1.1"
-      },
-      {
-        "name": "folding",
-        "version": "1.1.1"
-      },
-      {
-        "name": "argpase",
-        "version": "3.2"
-      },
-      {
-        "name": "catch2",
-        "version": "3.8.1"
-      },
-      {
-        "name": "nlohmann-json",
-        "version": "3.11.3"
-      }
-    ]
-  }