diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6a83342..b031b9e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,5 +39,5 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} run: | - sonar-scanner --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ + sonar-scanner --define sonar.cfamily.compile-commandss="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ --define sonar.coverageReportPaths=build/coverage.xml \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..b3c7ebd --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/lib/Files"] + path = tests/lib/Files + url = https://github.com/rmontanana/ArffFiles.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bbdb1a..5559336 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,20 +1,34 @@ cmake_minimum_required(VERSION 3.20) + project(mdlp) set(CMAKE_CXX_STANDARD 17) +cmake_policy(SET CMP0135 NEW) + find_package(Torch REQUIRED) -include_directories(${TORCH_INCLUDE_DIRS}) -add_library(mdlp CPPFImdlp.cpp Metrics.cpp BinDisc.cpp Discretizer.cpp) -target_link_libraries(mdlp "${TORCH_LIBRARIES}") -add_subdirectory(sample) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-elide-constructors") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline") endif() + if (ENABLE_TESTING) MESSAGE("Debug mode") enable_testing() set(CODE_COVERAGE ON) SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage") add_subdirectory(tests) -endif(ENABLE_TESTING) \ No newline at end of file +else(ENABLE_TESTING) + MESSAGE("Release mode") +endif(ENABLE_TESTING) + + +add_subdirectory(sample) + +include_directories( + ${TORCH_INCLUDE_DIRS} + ${mdlp_SOURCE_DIR}/src +) + +add_library(mdlp src/CPPFImdlp.cpp src/Metrics.cpp src/BinDisc.cpp src/Discretizer.cpp) +target_link_libraries(mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 722f601..7eb8efb 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -2,5 +2,10 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_BUILD_TYPE Debug) -add_executable(sample sample.cpp ../tests/ArffFiles.cpp) +include_directories( + ${mdlp_SOURCE_DIR}/src + ${mdlp_SOURCE_DIR}/tests/lib/Files +) + +add_executable(sample sample.cpp ) target_link_libraries(sample mdlp "${TORCH_LIBRARIES}") diff --git a/sample/sample.cpp b/sample/sample.cpp index b5604ff..465d92e 100644 --- a/sample/sample.cpp +++ b/sample/sample.cpp @@ -12,10 +12,10 @@ #include #include #include -#include "../Discretizer.h" -#include "../CPPFImdlp.h" -#include "../BinDisc.h" -#include "../tests/ArffFiles.h" +#include +#include "Discretizer.h" +#include "CPPFImdlp.h" +#include "BinDisc.h" const string PATH = "tests/datasets/"; diff --git a/BinDisc.cpp b/src/BinDisc.cpp similarity index 91% rename from BinDisc.cpp rename to src/BinDisc.cpp index d29a835..4f13b09 100644 --- a/BinDisc.cpp +++ b/src/BinDisc.cpp @@ -65,12 +65,12 @@ namespace mdlp { bool first = true; results.reserve(percentiles.size()); for (auto percentile : percentiles) { - const auto i = static_cast(std::floor(static_cast(data.size() - 1) * percentile / 100.)); + const auto i = static_cast(std::floor(static_cast(data.size() - 1) * percentile / 100.)); const auto indexLower = clip(i, 0, data.size() - 2); - const double percentI = static_cast(indexLower) / static_cast(data.size() - 1); - const double fraction = + const precision_t percentI = static_cast(indexLower) / static_cast(data.size() - 1); + const precision_t fraction = (percentile / 100.0 - percentI) / - (static_cast(indexLower + 1) / static_cast(data.size() - 1) - percentI); + (static_cast(indexLower + 1) / static_cast(data.size() - 1) - percentI); if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; value != results.back() || first) // first needed as results.back() return is undefined for empty vectors results.push_back(value); first = false; diff --git a/BinDisc.h b/src/BinDisc.h similarity index 100% rename from BinDisc.h rename to src/BinDisc.h diff --git a/CPPFImdlp.cpp b/src/CPPFImdlp.cpp similarity index 100% rename from CPPFImdlp.cpp rename to src/CPPFImdlp.cpp diff --git a/CPPFImdlp.h b/src/CPPFImdlp.h similarity index 100% rename from CPPFImdlp.h rename to src/CPPFImdlp.h diff --git a/Discretizer.cpp b/src/Discretizer.cpp similarity index 92% rename from Discretizer.cpp rename to src/Discretizer.cpp index 5acd45e..1522fb2 100644 --- a/Discretizer.cpp +++ b/src/Discretizer.cpp @@ -20,7 +20,7 @@ namespace mdlp { for (const precision_t& item : data) { auto pos = bound(first, last, item); auto number = pos - first; - discretizedData.push_back(number); + discretizedData.push_back(static_cast(number)); } return discretizedData; } @@ -41,7 +41,7 @@ namespace mdlp { auto num_elements = X_.numel(); samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); auto result = transform(X); - return torch::tensor(result, torch::kInt32); + return torch::tensor(result, torch_label_t); } torch::Tensor Discretizer::fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_) { @@ -49,6 +49,6 @@ namespace mdlp { samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); labels_t y(y_.data_ptr(), y_.data_ptr() + num_elements); auto result = fit_transform(X, y); - return torch::tensor(result, torch::kInt32); + return torch::tensor(result, torch_label_t); } } \ No newline at end of file diff --git a/Discretizer.h b/src/Discretizer.h similarity index 96% rename from Discretizer.h rename to src/Discretizer.h index b8069dc..88db52a 100644 --- a/Discretizer.h +++ b/src/Discretizer.h @@ -9,14 +9,15 @@ #include #include -#include #include "typesFImdlp.h" +#include namespace mdlp { enum class bound_dir_t { LEFT, RIGHT }; + const auto torch_label_t = torch::kInt32; class Discretizer { public: Discretizer() = default; diff --git a/Metrics.cpp b/src/Metrics.cpp similarity index 100% rename from Metrics.cpp rename to src/Metrics.cpp diff --git a/Metrics.h b/src/Metrics.h similarity index 100% rename from Metrics.h rename to src/Metrics.h diff --git a/typesFImdlp.h b/src/typesFImdlp.h similarity index 86% rename from typesFImdlp.h rename to src/typesFImdlp.h index a0b6d54..0b5c5c7 100644 --- a/typesFImdlp.h +++ b/src/typesFImdlp.h @@ -8,8 +8,9 @@ using namespace std; namespace mdlp { typedef float precision_t; + typedef int label_t; typedef std::vector samples_t; - typedef std::vector labels_t; + typedef std::vector labels_t; typedef std::vector indices_t; typedef std::vector cutPoints_t; typedef std::map, precision_t> cacheEnt_t; diff --git a/tests/ArffFiles.cpp b/tests/ArffFiles.cpp deleted file mode 100644 index 3b88117..0000000 --- a/tests/ArffFiles.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// **************************************************************** -// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX - FileType: SOURCE -// SPDX - License - Identifier: MIT -// **************************************************************** - -#include "ArffFiles.h" -#include -#include -#include - -using namespace std; - -ArffFiles::ArffFiles() = default; - -vector ArffFiles::getLines() const -{ - return lines; -} - -unsigned long int ArffFiles::getSize() const -{ - return lines.size(); -} - -vector> ArffFiles::getAttributes() const -{ - return attributes; -} - -string ArffFiles::getClassName() const -{ - return className; -} - -string ArffFiles::getClassType() const -{ - return classType; -} - -vector& ArffFiles::getX() -{ - return X; -} - -vector& ArffFiles::getY() -{ - return y; -} - -void ArffFiles::load(const string& fileName, bool classLast) -{ - ifstream file(fileName); - if (!file.is_open()) { - throw invalid_argument("Unable to open file"); - } - string line; - string keyword; - string attribute; - string type; - string type_w; - while (getline(file, line)) { - if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { - continue; - } - if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { - stringstream ss(line); - ss >> keyword >> attribute; - type = ""; - while (ss >> type_w) - type += type_w + " "; - attributes.emplace_back(trim(attribute), trim(type)); - continue; - } - if (line[0] == '@') { - continue; - } - lines.push_back(line); - } - file.close(); - if (attributes.empty()) - throw invalid_argument("No attributes found"); - if (classLast) { - className = get<0>(attributes.back()); - classType = get<1>(attributes.back()); - attributes.pop_back(); - } else { - className = get<0>(attributes.front()); - classType = get<1>(attributes.front()); - attributes.erase(attributes.begin()); - } - generateDataset(classLast); - -} - -void ArffFiles::generateDataset(bool classLast) -{ - X = vector(attributes.size(), mdlp::samples_t(lines.size())); - auto yy = vector(lines.size(), ""); - int labelIndex = classLast ? static_cast(attributes.size()) : 0; - for (size_t i = 0; i < lines.size(); i++) { - stringstream ss(lines[i]); - string value; - int pos = 0; - int xIndex = 0; - while (getline(ss, value, ',')) { - if (pos++ == labelIndex) { - yy[i] = value; - } else { - X[xIndex++][i] = stof(value); - } - } - } - y = factorize(yy); -} - -string ArffFiles::trim(const string& source) -{ - string s(source); - s.erase(0, s.find_first_not_of(" '\n\r\t")); - s.erase(s.find_last_not_of(" '\n\r\t") + 1); - return s; -} - -vector ArffFiles::factorize(const vector& labels_t) -{ - vector yy; - yy.reserve(labels_t.size()); - map labelMap; - int i = 0; - for (const string& label : labels_t) { - if (labelMap.find(label) == labelMap.end()) { - labelMap[label] = i++; - } - yy.push_back(labelMap[label]); - } - return yy; -} \ No newline at end of file diff --git a/tests/ArffFiles.h b/tests/ArffFiles.h deleted file mode 100644 index 985dce0..0000000 --- a/tests/ArffFiles.h +++ /dev/null @@ -1,41 +0,0 @@ -// **************************************************************** -// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX - FileType: SOURCE -// SPDX - License - Identifier: MIT -// **************************************************************** - -#ifndef ARFFFILES_H -#define ARFFFILES_H - -#include -#include -#include "../typesFImdlp.h" - -using namespace std; - -class ArffFiles { -private: - vector lines; - vector> attributes; - string className; - string classType; - vector X; - vector y; - - void generateDataset(bool); - -public: - ArffFiles(); - void load(const string&, bool = true); - vector getLines() const; - unsigned long int getSize() const; - string getClassName() const; - string getClassType() const; - static string trim(const string&); - vector& getX(); - vector& getY(); - vector> getAttributes() const; - static vector factorize(const vector& labels_t); -}; - -#endif \ No newline at end of file diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index db0e051..e19c727 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -8,8 +8,8 @@ #include #include #include "gtest/gtest.h" -#include "ArffFiles.h" -#include "../BinDisc.h" +#include +#include "BinDisc.h" #include "Experiments.hpp" namespace mdlp { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 64366f2..ac4723a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,3 @@ -cmake_minimum_required(VERSION 3.20) -set(CMAKE_CXX_STANDARD 17) -cmake_policy(SET CMP0135 NEW) include(FetchContent) include_directories(${GTEST_INCLUDE_DIRS}) FetchContent_Declare( @@ -11,28 +8,30 @@ FetchContent_Declare( set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) -find_package(Torch REQUIRED) +include_directories( + ${TORCH_INCLUDE_DIRS} + ${mdlp_SOURCE_DIR}/src + ${mdlp_SOURCE_DIR}/tests/lib/Files +) -enable_testing() - -include_directories(${TORCH_INCLUDE_DIRS}) - -add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp) +add_executable(Metrics_unittest ${mdlp_SOURCE_DIR}/src/Metrics.cpp Metrics_unittest.cpp) target_link_libraries(Metrics_unittest GTest::gtest_main) target_compile_options(Metrics_unittest PRIVATE --coverage) target_link_options(Metrics_unittest PRIVATE --coverage) -add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp ../Discretizer.cpp) +add_executable(FImdlp_unittest FImdlp_unittest.cpp +${mdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${mdlp_SOURCE_DIR}/src/Metrics.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp) target_link_libraries(FImdlp_unittest GTest::gtest_main "${TORCH_LIBRARIES}") target_compile_options(FImdlp_unittest PRIVATE --coverage) target_link_options(FImdlp_unittest PRIVATE --coverage) -add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp ../Discretizer.cpp) +add_executable(BinDisc_unittest BinDisc_unittest.cpp ${mdlp_SOURCE_DIR}/src/BinDisc.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp) target_link_libraries(BinDisc_unittest GTest::gtest_main "${TORCH_LIBRARIES}") target_compile_options(BinDisc_unittest PRIVATE --coverage) target_link_options(BinDisc_unittest PRIVATE --coverage) -add_executable(Discretizer_unittest ../BinDisc.cpp ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp ../Discretizer.cpp Discretizer_unittest.cpp) +add_executable(Discretizer_unittest Discretizer_unittest.cpp +${mdlp_SOURCE_DIR}/src/BinDisc.cpp ${mdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${mdlp_SOURCE_DIR}/src/Metrics.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp ) target_link_libraries(Discretizer_unittest GTest::gtest_main "${TORCH_LIBRARIES}") target_compile_options(Discretizer_unittest PRIVATE --coverage) target_link_options(Discretizer_unittest PRIVATE --coverage) diff --git a/tests/Discretizer_unittest.cpp b/tests/Discretizer_unittest.cpp index d8a7f7f..70baada 100644 --- a/tests/Discretizer_unittest.cpp +++ b/tests/Discretizer_unittest.cpp @@ -7,11 +7,11 @@ #include #include #include +#include #include "gtest/gtest.h" -#include "ArffFiles.h" -#include "../Discretizer.h" -#include "../BinDisc.h" -#include "../CPPFImdlp.h" +#include "Discretizer.h" +#include "BinDisc.h" +#include "CPPFImdlp.h" namespace mdlp { const float margin = 1e-4; diff --git a/tests/Experiments.hpp b/tests/Experiments.hpp index d122529..40fd6e8 100644 --- a/tests/Experiments.hpp +++ b/tests/Experiments.hpp @@ -12,7 +12,7 @@ #include #include #include -#include "../typesFImdlp.h" +#include "typesFImdlp.h" template void show_vector(const std::vector& data, std::string title) diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index 38fe830..26ae424 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -4,12 +4,12 @@ // SPDX - License - Identifier: MIT // **************************************************************** -#include "gtest/gtest.h" -#include "../Metrics.h" -#include "../CPPFImdlp.h" #include #include -#include "ArffFiles.h" +#include +#include "gtest/gtest.h" +#include "Metrics.h" +#include "CPPFImdlp.h" #define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \ try { \ diff --git a/tests/Metrics_unittest.cpp b/tests/Metrics_unittest.cpp index 990321b..40389fc 100644 --- a/tests/Metrics_unittest.cpp +++ b/tests/Metrics_unittest.cpp @@ -5,7 +5,7 @@ // **************************************************************** #include "gtest/gtest.h" -#include "../Metrics.h" +#include "Metrics.h" namespace mdlp { class TestMetrics : public Metrics, public testing::Test { diff --git a/tests/lib/Files b/tests/lib/Files new file mode 160000 index 0000000..a531692 --- /dev/null +++ b/tests/lib/Files @@ -0,0 +1 @@ +Subproject commit a5316928d408266aa425f64131ab0f592b010a8d diff --git a/tests/tests_generate.ipynb b/tests/tests_generate.ipynb index d9678fd..247914e 100644 --- a/tests/tests_generate.ipynb +++ b/tests/tests_generate.ipynb @@ -201,7 +201,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.1.undefined" } }, "nbformat": 4,