Move tests library to tests/lib

Add PBC4cip classifier
2025-01-09 10:04:16 +01:00
20 changed files with 96 additions and 187 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,13 @@
 [submodule "lib/json"]
 	path = lib/json
 	url = https://github.com/nlohmann/json.git
 [submodule "lib/catch2"]
 	path = tests/lib/catch2
 	url = https://github.com/catchorg/Catch2.git
 [submodule "lib/mdlp"]
 	path = tests/lib/mdlp
 	url = https://github.com/rmontanana/mdlp
 [submodule "tests/lib/Files"]
 	path = tests/lib/Files
 	url = https://github.com/rmontanana/ArffFiles
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,8 +45,6 @@ endif()
 find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
 message("Python3_LIBRARIES=${Python3_LIBRARIES}")
 find_package(nlohmann_json CONFIG REQUIRED)
 # CMakes modules
 # --------------
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -66,10 +64,9 @@ endif (ENABLE_CLANG_TIDY)
 # External libraries - dependencies of PyClassifiers
 # --------------------------------------------------
-find_library(bayesnet NAMES libbayesnet bayesnet libbayesnet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
+find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
-find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ../lib/include)
+find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${PyClassifiers_SOURCE_DIR}/../lib/include)
-
+message(STATUS "BayesNet=${BayesNet}")
 message(STATUS "BayesNet=${bayesnet}")
 message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
@@ -81,8 +78,9 @@ add_subdirectory(pyclfs)
 # -------
 if (ENABLE_TESTING)
  MESSAGE("Testing enabled")
-  find_package(Catch2 CONFIG REQUIRED)
+  add_git_submodule(tests/lib/catch2)
-  find_package(arff-files CONFIG REQUIRED)
+  add_git_submodule(tests/lib/mdlp)
  add_subdirectory(tests/lib/Files)
  include(CTest)
  add_subdirectory(tests)
 endif (ENABLE_TESTING)
--- a/4
+++ b/4
@@ -52,14 +52,14 @@ debug: ## Build a debug version of the project
 	@echo ">>> Building Debug PyClassifiers...";
 	@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
 	@mkdir $(f_debug); 
-	@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
+	@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
 	@echo ">>> Done";
 release: ## Build a Release version of the project
 	@echo ">>> Building Release PyClassifiers...";
 	@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
 	@mkdir $(f_release); 
-	@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
+	@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
 	@echo ">>> Done";	
 opt = ""
--- a/lib/json
+++ b/lib/json
--- a/pyclfs/AdaBoostPy.cc
+++ b/pyclfs/AdaBoostPy.cc
@@ -1,20 +0,0 @@
 #include "AdaBoostPy.h"
 namespace pywrap {
    AdaBoostPy::AdaBoostPy() : PyClassifier("sklearn.ensemble", "AdaBoostClassifier", true)
    {
        validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
    }
    int AdaBoostPy::getNumberOfEdges() const
    {
        return callMethodSumOfItems("get_n_leaves");
    }
    int AdaBoostPy::getNumberOfStates() const
    {
        return callMethodSumOfItems("get_depth");
    }
    int AdaBoostPy::getNumberOfNodes() const
    {
        return callMethodSumOfItems("node_count");
    }
 } /* namespace pywrap */
--- a/pyclfs/AdaBoostPy.h
+++ b/pyclfs/AdaBoostPy.h
@@ -1,15 +0,0 @@
 #ifndef ADABOOSTPY_H
 #define ADABOOSTPY_H
 #include "PyClassifier.h"
 namespace pywrap {
    class AdaBoostPy : public PyClassifier {
    public:
        AdaBoostPy();
        ~AdaBoostPy() = default;
        int getNumberOfEdges() const override;
        int getNumberOfStates() const override;
        int getNumberOfNodes() const override;
    };
 } /* namespace pywrap */
 #endif /* ADABOOST_H */
--- a/pyclfs/CMakeLists.txt
+++ b/pyclfs/CMakeLists.txt
@@ -4,5 +4,5 @@ include_directories(
    ${PyClassifiers_SOURCE_DIR}/lib/json/include
    ${Bayesnet_INCLUDE_DIRS}
 )
-add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc AdaBoostPy.cc PyClassifier.cc PyWrap.cc)
+add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc PBC4cip.cc)
-target_link_libraries(PyClassifiers nlohmann_json::nlohmann_json ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)
+target_link_libraries(PyClassifiers ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)
--- a/pyclfs/PBC4cip.cc
+++ b/pyclfs/PBC4cip.cc
@@ -0,0 +1,8 @@
 #include "PBC4cip.h"
 namespace pywrap {
    PBC4cip::PBC4cip() : PyClassifier("core.PBC4cip", "PBC4cip", true)
    {
        validHyperparameters = { "random_state" };
    }
 } /* namespace pywrap */
--- a/pyclfs/PBC4cip.h
+++ b/pyclfs/PBC4cip.h
@@ -0,0 +1,13 @@
 #ifndef PBC4CIP_H
 #define PBC4CIP_H
 #include "PyClassifier.h"
 namespace pywrap {
    class PBC4cip : public PyClassifier {
    public:
        PBC4cip();
        ~PBC4cip() = default;
    };
 } /* namespace pywrap */
 #endif /* PBC4CIP_H */
--- a/pyclfs/PyClassifier.cc
+++ b/pyclfs/PyClassifier.cc
@@ -93,19 +93,11 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
        }
-        if (xgboost) {
+        int* data = reinterpret_cast<int*>(prediction.get_data());
-            long* data = reinterpret_cast<long*>(prediction.get_data());
+        std::vector<int> vPrediction(data, data + prediction.shape(0));
-            std::vector<int> vPrediction(data, data + prediction.shape(0));
+        auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
-            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
+        Py_XDECREF(incoming);
-            Py_XDECREF(incoming);
+        return resultTensor;
            return resultTensor;
        } else {
            int* data = reinterpret_cast<int*>(prediction.get_data());
            std::vector<int> vPrediction(data, data + prediction.shape(0));
            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
            Py_XDECREF(incoming);
            return resultTensor;
        }
    }
    torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
    {
@@ -126,19 +118,11 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
        }
-        if (xgboost) {
+        double* data = reinterpret_cast<double*>(prediction.get_data());
-            float* data = reinterpret_cast<float*>(prediction.get_data());
+        std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
-            std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
+        auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
-            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
+        Py_XDECREF(incoming);
-            Py_XDECREF(incoming);
+        return resultTensor;
            return resultTensor;
        } else {
            double* data = reinterpret_cast<double*>(prediction.get_data());
            std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
            Py_XDECREF(incoming);
            return resultTensor;
        }
    }
    float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
    {
--- a/pyclfs/PyClassifier.h
+++ b/pyclfs/PyClassifier.h
@@ -49,7 +49,6 @@ namespace pywrap {
        nlohmann::json hyperparameters;
        void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
        std::vector<std::string> notes;
        bool xgboost = false;
    private:
        PyWrap* pyWrap;
        std::string module;
--- a/pyclfs/XGBoost.cc
+++ b/pyclfs/XGBoost.cc
@@ -5,6 +5,5 @@ namespace pywrap {
    XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
    {
        validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
        xgboost = true;
    }
 } /* namespace pywrap */
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -2,13 +2,15 @@ if(ENABLE_TESTING)
    set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers")
    include_directories(
        ${PyClassifiers_SOURCE_DIR}
        ${PyClassifiers_SOURCE_DIR}/lib/Files
        ${PyClassifiers_SOURCE_DIR}/lib/mdlp
        ${PyClassifiers_SOURCE_DIR}/lib/json/include
        ${Python3_INCLUDE_DIRS}
        ${TORCH_INCLUDE_DIRS}
        ${CMAKE_BINARY_DIR}/configured_files/include
        /usr/local/include
    )
    file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
    set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
    add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
-    target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy fimdlp Catch2::Catch2WithMain)
+    target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain)
 endif(ENABLE_TESTING)
--- a/tests/TestPythonClassifiers.cc
+++ b/tests/TestPythonClassifiers.cc
@@ -10,16 +10,14 @@
 #include "pyclfs/SVC.h"
 #include "pyclfs/RandomForest.h"
 #include "pyclfs/XGBoost.h"
 #include "pyclfs/AdaBoostPy.h"
 #include "pyclfs/ODTE.h"
 #include "TestUtils.h"
 #include <iostream>
 TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
 {
    map <pair<std::string, std::string>, float> scores = {
        // Diabetes
-        {{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
+        {{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.854166687}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
        // Ecoli
        {{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
        // Glass
@@ -35,10 +33,10 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
        {"RandomForest", new pywrap::RandomForest()}
    };
    map<std::string, std::string> versions = {
-        {"ODTE", "1.0.0-1"},
+        {"ODTE", "1.0.0"},
-        {"STree", "1.4.0"},
+        {"STree", "1.3.2"},
-        {"SVC", "1.5.2"},
+        {"SVC", "1.5.1"},
-        {"RandomForest", "1.5.2"}
+        {"RandomForest", "1.5.1"}
    };
    auto clf = models[name];
@@ -60,15 +58,6 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
        REQUIRE(clf->getVersion() == versions[name]);
    }
 }
 TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
 {
    auto raw = RawDatasets("iris", false);
    auto clf = pywrap::AdaBoostPy();
    clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
    clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
    auto score = clf.score(raw.Xt, raw.yt);
    REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
 }
 TEST_CASE("Classifiers features", "[PyClassifiers]")
 {
    auto raw = RawDatasets("iris", false);
@@ -127,30 +116,33 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
    clf.setHyperparameters(hyperparameters);
    auto score = clf.score(raw.Xt, raw.yt);
    REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
    std::cout << "XGBoost score: " << score << std::endl;
 }
-TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
+// TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
 // {
 //     auto raw = RawDatasets("iris", true);
 //     auto clf = pywrap::XGBoost();
 //     clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
 //     // nlohmann::json hyperparameters = { "n_jobs=1" };
 //     // clf.setHyperparameters(hyperparameters);
 //     auto predict = clf.predict(raw.Xt);
 //     for (int row = 0; row < predict.size(0); row++) {
 //         auto sum = 0.0;
 //         for (int col = 0; col < predict.size(1); col++) {
 //             std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
 //             sum += predict[row][col].item<int>();
 //         }
 //         std::cout << std::endl;
 //         // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
 //     }
 //     std::cout << predict << std::endl;
 // }
 TEST_CASE("PBC4cip", "[PyClassifiers]")
 {
    auto raw = RawDatasets("iris", true);
-    auto clf = pywrap::XGBoost();
+    auto clf = pywrap::PBC4cip();
    clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
-    // nlohmann::json hyperparameters = { "n_jobs=1" };
+    nlohmann::json hyperparameters = { };
-    // clf.setHyperparameters(hyperparameters);
+    clf.setHyperparameters(hyperparameters);
-    auto predict_proba = clf.predict_proba(raw.Xt);
+    auto score = clf.score(raw.Xt, raw.yt);
-    auto predict = clf.predict(raw.Xt);
+    REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
    // std::cout << "Predict proba: " << predict_proba << std::endl;
    // std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
    // assert(predict.size(0) == predict_proba.size(0));
    for (int row = 0; row < predict_proba.size(0); row++) {
        // auto sum = 0.0;
        // std::cout << "Row " << std::setw(3) << row << ": ";
        // for (int col = 0; col < predict_proba.size(1); col++) {
        //     std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
        //     sum += predict_proba[row][col].item<double>();
        // }
        // std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
        //     // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
        REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
        REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
    }
 }
--- a/tests/TestUtils.h
+++ b/tests/TestUtils.h
@@ -5,8 +5,8 @@
 #include <vector>
 #include <map>
 #include <tuple>
-#include "ArffFiles/ArffFiles.hpp"
+#include "ArffFiles.h"
-#include "fimdlp/CPPFImdlp.h"
+#include "CPPFImdlp.h"
 bool file_exists(const std::string& name);
 std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
--- a/tests/lib/Files
+++ b/tests/lib/Files
--- a/tests/lib/catch2
+++ b/tests/lib/catch2
--- a/tests/lib/mdlp
+++ b/tests/lib/mdlp
--- a/vcpkg-configuration.json
+++ b/vcpkg-configuration.json
@@ -1,21 +0,0 @@
 {
  "default-registry": {
    "kind": "git",
    "baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605",
    "repository": "https://github.com/microsoft/vcpkg"
  },
  "registries": [
    {
      "kind": "git",
      "repository": "https://github.com/rmontanana/vcpkg-stash",
      "baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
      "packages": [
        "arff-files",
        "bayesnet",
        "fimdlp",
        "folding",
        "libtorch-bin"
      ]
    }
  ]
 }
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -1,47 +0,0 @@
  {
    "name": "platform",
    "version-string": "1.1.0",
    "dependencies": [
      "arff-files",
      "nlohmann-json",
      "fimdlp",
      "libtorch-bin",
      "folding",
      "argparse",
      "catch2"
    ],
    "overrides": [
      {
        "name": "arff-files",
        "version": "1.1.0"
      },
      {
        "name": "fimdlp",
        "version": "2.0.1"
      },
      {
        "name": "libtorch-bin",
        "version": "2.7.0"
      },
      {
        "name": "bayesnet",
        "version": "1.1.1"
      },
      {
        "name": "folding",
        "version": "1.1.1"
      },
      {
        "name": "argpase",
        "version": "3.2"
      },
      {
        "name": "catch2",
        "version": "3.8.1"
      },
      {
        "name": "nlohmann-json",
        "version": "3.11.3"
      }
    ]
  }