Update README

Fix Error in percentile method
Update build.yml
2025-08-21 02:15:57 +00:00 · 2024-06-24 11:47:03 +02:00 · 2024-06-24 10:55:26 +02:00 · 2024-06-14 22:04:29 +02:00 · 2024-06-14 22:01:11 +02:00 · 2024-06-14 21:17:30 +02:00
11 changed files with 46 additions and 22 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -28,7 +28,7 @@ jobs:
          unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
      - name: Tests & build-wrapper
        run: |
-          cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch
+          cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DENABLE_TESTING=ON
          build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
          cd build
          make
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,8 @@
 **/build
 build_Debug
 build_Release
+build_debug
+build_release
 **/lcoverage
 .idea
 cmake-*
--- a/BinDisc.cpp
+++ b/BinDisc.cpp
@@ -58,7 +58,7 @@ namespace mdlp {
        results.reserve(percentiles.size());
        for (auto percentile : percentiles) {
            const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
-            const auto indexLower = clip(i, 0, data.size() - 1);
+            const auto indexLower = clip(i, 0, data.size() - 2);
            const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
            const double fraction =
                (percentile / 100.0 - percentI) /
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,4 +6,6 @@ include_directories(${TORCH_INCLUDE_DIRS})
 add_library(mdlp CPPFImdlp.cpp Metrics.cpp BinDisc.cpp Discretizer.cpp)
 target_link_libraries(mdlp "${TORCH_LIBRARIES}")
 add_subdirectory(sample)
+if (ENABLE_TESTING)
    add_subdirectory(tests)
+endif(ENABLE_TESTING)
--- a/Discretizer.cpp
+++ b/Discretizer.cpp
@@ -20,7 +20,7 @@ namespace mdlp {
    {
        auto num_elements = X_.numel();
        samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
-        labels_t y(y_.data_ptr<int64_t>(), y_.data_ptr<int64_t>() + num_elements);
+        labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
        fit(X, y);
    }
    torch::Tensor Discretizer::transform_t(torch::Tensor& X_)
@@ -28,14 +28,14 @@ namespace mdlp {
        auto num_elements = X_.numel();
        samples_t X(X_.data_ptr<float>(), X_.data_ptr<float>() + num_elements);
        auto result = transform(X);
-        return torch::tensor(result, torch::kInt64);
+        return torch::tensor(result, torch::kInt32);
    }
    torch::Tensor Discretizer::fit_transform_t(torch::Tensor& X_, torch::Tensor& y_)
    {
        auto num_elements = X_.numel();
        samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
-        labels_t y(y_.data_ptr<int64_t>(), y_.data_ptr<int64_t>() + num_elements);
+        labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
        auto result = fit_transform(X, y);
-        return torch::tensor(result, torch::kInt64);
+        return torch::tensor(result, torch::kInt32);
    }
 }
--- a/Discretizer.h
+++ b/Discretizer.h
@@ -18,7 +18,7 @@ namespace mdlp {
        void fit_t(torch::Tensor& X_, torch::Tensor& y_);
        torch::Tensor transform_t(torch::Tensor& X_);
        torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
-        static inline std::string version() { return "1.2.1"; };
+        static inline std::string version() { return "1.2.2"; };
    protected:
        labels_t discretizedData = labels_t();
        cutPoints_t cutPoints;
--- a/13
+++ b/13
@@ -0,0 +1,13 @@
+SHELL := /bin/bash
+.DEFAULT_GOAL := build
+.PHONY: build test
+
+build: 
+	@if [ -d build_release ]; then rm -fr build_release; fi
+	@mkdir build_release
+	@cmake -B build_release -S . -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF
+	@cmake --build build_release
+
+test:
+	@echo "Testing..."
+	@cd tests && ./test
--- a/README.md
+++ b/README.md
@@ -14,9 +14,17 @@ The implementation tries to mitigate the problem of different label values with
 Other features:

 - Intervals with the same value of the variable are not taken into account for cutpoints.
- Intervals have to have more than two examples to be evaluated.
+- Intervals have to have more than two examples to be evaluated (mdlp).

-The algorithm returns the cut points for the variable.
+- The algorithm returns the cut points for the variable.
+
+- The transform method uses the cut points returning its index in the following way:
+
+        cut[i - 1] <= x < cut[i]
+
+    using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
+
+- K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.

 ## Sample

@@ -34,6 +42,5 @@ build/sample/sample -h
 To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:

 ```bash
-cd tests
-./test
+make test
 ```
--- a/sample/sample.cpp
+++ b/sample/sample.cpp
@@ -139,12 +139,12 @@ void process_file(const string& path, const string& file_name, bool class_last,
        std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
    }
    auto Xt = torch::tensor(X[0], torch::kFloat32);
-    auto yt = torch::tensor(y, torch::kInt64);
+    auto yt = torch::tensor(y, torch::kInt32);
    //test.fit_t(Xt, yt);
    auto result = test.fit_transform_t(Xt, yt);
    std::cout << "Transformed data (torch)...: " << std::endl;
    for (int i = 130; i < 135; i++) {
-        std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << result[i].item<int64_t>() << std::endl;
+        std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << result[i].item<int>() << std::endl;
    }
    auto disc = mdlp::BinDisc(3);
    auto res_v = disc.fit_transform(X[0], y);
@@ -152,7 +152,7 @@ void process_file(const string& path, const string& file_name, bool class_last,
    auto res_t = disc.transform_t(Xt);
    std::cout << "Transformed data (BinDisc)...: " << std::endl;
    for (int i = 130; i < 135; i++) {
-        std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << res_v[i] << " " << res_t[i].item<int64_t>() << std::endl;
+        std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << res_v[i] << " " << res_t[i].item<int>() << std::endl;
    }
 }

--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -3,7 +3,7 @@ sonar.organization=rmontanana

 # This is the name and version displayed in the SonarCloud UI.
 sonar.projectName=mdlp
-sonar.projectVersion=1.1.3
+sonar.projectVersion=1.2.1
 # sonar.test.exclusions=tests/**
 # sonar.tests=tests/
 # sonar.coverage.exclusions=tests/**,sample/**
--- a/tests/BinDisc_unittest.cpp
+++ b/tests/BinDisc_unittest.cpp
@@ -335,10 +335,10 @@ namespace mdlp {
        auto Xtt = fit_transform(X[0], file.getY());
        EXPECT_EQ(expected, Xtt);
        auto Xt_t = torch::tensor(X[0], torch::kFloat32);
-        auto y_t = torch::tensor(file.getY(), torch::kInt64);
+        auto y_t = torch::tensor(file.getY(), torch::kInt32);
        auto Xtt_t = fit_transform_t(Xt_t, y_t);
        for (int i = 0; i < expected.size(); i++)
-            EXPECT_EQ(expected[i], Xtt_t[i].item<int64_t>());
+            EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
    }
    TEST_F(TestBinDisc4Q, irisQuantile)
    {
@@ -352,13 +352,13 @@ namespace mdlp {
        auto Xtt = fit_transform(X[0], file.getY());
        EXPECT_EQ(expected, Xtt);
        auto Xt_t = torch::tensor(X[0], torch::kFloat32);
-        auto y_t = torch::tensor(file.getY(), torch::kInt64);
+        auto y_t = torch::tensor(file.getY(), torch::kInt32);
        auto Xtt_t = fit_transform_t(Xt_t, y_t);
        for (int i = 0; i < expected.size(); i++)
-            EXPECT_EQ(expected[i], Xtt_t[i].item<int64_t>());
+            EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
        fit_t(Xt_t, y_t);
        auto Xt_t2 = transform_t(Xt_t);
        for (int i = 0; i < expected.size(); i++)
-            EXPECT_EQ(expected[i], Xt_t2[i].item<int64_t>());
+            EXPECT_EQ(expected[i], Xt_t2[i].item<int>());
    }
 }
Author	SHA1	Message	Date
Ricardo Montañana Gómez	7b0673fd4b	Update README	2024-06-24 11:47:03 +02:00
Ricardo Montañana Gómez	a1346e1943	Fix Error in percentile method	2024-06-24 10:55:26 +02:00
Ricardo Montañana Gómez	b3fc598c29	Update build.yml	2024-06-14 22:04:29 +02:00
Ricardo Montañana Gómez	cc1efa0b4e	Update README	2024-06-14 22:01:11 +02:00
Ricardo Montañana Gómez	90965877eb	Add Makefile with build & test actions	2024-06-14 21:17:30 +02:00
Ricardo Montañana Gómez	c4e6c041fe	Fix int type	2024-06-09 00:29:55 +02:00
Ricardo Montañana Gómez	7938df7f0f	Update sonar mdlp version	2024-06-08 13:25:28 +02:00