mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-21 02:15:57 +00:00
Compare commits
7 Commits
v1.2.1
...
7b0673fd4b
Author | SHA1 | Date | |
---|---|---|---|
7b0673fd4b
|
|||
a1346e1943
|
|||
b3fc598c29
|
|||
cc1efa0b4e
|
|||
90965877eb
|
|||
c4e6c041fe
|
|||
7938df7f0f
|
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
|||||||
unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
|
unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
|
||||||
- name: Tests & build-wrapper
|
- name: Tests & build-wrapper
|
||||||
run: |
|
run: |
|
||||||
cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch
|
cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DENABLE_TESTING=ON
|
||||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
|
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
|
||||||
cd build
|
cd build
|
||||||
make
|
make
|
||||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@@ -33,6 +33,8 @@
|
|||||||
**/build
|
**/build
|
||||||
build_Debug
|
build_Debug
|
||||||
build_Release
|
build_Release
|
||||||
|
build_debug
|
||||||
|
build_release
|
||||||
**/lcoverage
|
**/lcoverage
|
||||||
.idea
|
.idea
|
||||||
cmake-*
|
cmake-*
|
||||||
|
@@ -58,7 +58,7 @@ namespace mdlp {
|
|||||||
results.reserve(percentiles.size());
|
results.reserve(percentiles.size());
|
||||||
for (auto percentile : percentiles) {
|
for (auto percentile : percentiles) {
|
||||||
const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
|
const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
|
||||||
const auto indexLower = clip(i, 0, data.size() - 1);
|
const auto indexLower = clip(i, 0, data.size() - 2);
|
||||||
const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
|
const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
|
||||||
const double fraction =
|
const double fraction =
|
||||||
(percentile / 100.0 - percentI) /
|
(percentile / 100.0 - percentI) /
|
||||||
|
@@ -6,4 +6,6 @@ include_directories(${TORCH_INCLUDE_DIRS})
|
|||||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp BinDisc.cpp Discretizer.cpp)
|
add_library(mdlp CPPFImdlp.cpp Metrics.cpp BinDisc.cpp Discretizer.cpp)
|
||||||
target_link_libraries(mdlp "${TORCH_LIBRARIES}")
|
target_link_libraries(mdlp "${TORCH_LIBRARIES}")
|
||||||
add_subdirectory(sample)
|
add_subdirectory(sample)
|
||||||
add_subdirectory(tests)
|
if (ENABLE_TESTING)
|
||||||
|
add_subdirectory(tests)
|
||||||
|
endif(ENABLE_TESTING)
|
||||||
|
@@ -20,7 +20,7 @@ namespace mdlp {
|
|||||||
{
|
{
|
||||||
auto num_elements = X_.numel();
|
auto num_elements = X_.numel();
|
||||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||||
labels_t y(y_.data_ptr<int64_t>(), y_.data_ptr<int64_t>() + num_elements);
|
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
}
|
}
|
||||||
torch::Tensor Discretizer::transform_t(torch::Tensor& X_)
|
torch::Tensor Discretizer::transform_t(torch::Tensor& X_)
|
||||||
@@ -28,14 +28,14 @@ namespace mdlp {
|
|||||||
auto num_elements = X_.numel();
|
auto num_elements = X_.numel();
|
||||||
samples_t X(X_.data_ptr<float>(), X_.data_ptr<float>() + num_elements);
|
samples_t X(X_.data_ptr<float>(), X_.data_ptr<float>() + num_elements);
|
||||||
auto result = transform(X);
|
auto result = transform(X);
|
||||||
return torch::tensor(result, torch::kInt64);
|
return torch::tensor(result, torch::kInt32);
|
||||||
}
|
}
|
||||||
torch::Tensor Discretizer::fit_transform_t(torch::Tensor& X_, torch::Tensor& y_)
|
torch::Tensor Discretizer::fit_transform_t(torch::Tensor& X_, torch::Tensor& y_)
|
||||||
{
|
{
|
||||||
auto num_elements = X_.numel();
|
auto num_elements = X_.numel();
|
||||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||||
labels_t y(y_.data_ptr<int64_t>(), y_.data_ptr<int64_t>() + num_elements);
|
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||||
auto result = fit_transform(X, y);
|
auto result = fit_transform(X, y);
|
||||||
return torch::tensor(result, torch::kInt64);
|
return torch::tensor(result, torch::kInt32);
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -18,7 +18,7 @@ namespace mdlp {
|
|||||||
void fit_t(torch::Tensor& X_, torch::Tensor& y_);
|
void fit_t(torch::Tensor& X_, torch::Tensor& y_);
|
||||||
torch::Tensor transform_t(torch::Tensor& X_);
|
torch::Tensor transform_t(torch::Tensor& X_);
|
||||||
torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
|
torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
|
||||||
static inline std::string version() { return "1.2.1"; };
|
static inline std::string version() { return "1.2.2"; };
|
||||||
protected:
|
protected:
|
||||||
labels_t discretizedData = labels_t();
|
labels_t discretizedData = labels_t();
|
||||||
cutPoints_t cutPoints;
|
cutPoints_t cutPoints;
|
||||||
|
13
Makefile
Normal file
13
Makefile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
SHELL := /bin/bash
|
||||||
|
.DEFAULT_GOAL := build
|
||||||
|
.PHONY: build test
|
||||||
|
|
||||||
|
build:
|
||||||
|
@if [ -d build_release ]; then rm -fr build_release; fi
|
||||||
|
@mkdir build_release
|
||||||
|
@cmake -B build_release -S . -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF
|
||||||
|
@cmake --build build_release
|
||||||
|
|
||||||
|
test:
|
||||||
|
@echo "Testing..."
|
||||||
|
@cd tests && ./test
|
15
README.md
15
README.md
@@ -14,9 +14,17 @@ The implementation tries to mitigate the problem of different label values with
|
|||||||
Other features:
|
Other features:
|
||||||
|
|
||||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||||
- Intervals have to have more than two examples to be evaluated.
|
- Intervals have to have more than two examples to be evaluated (mdlp).
|
||||||
|
|
||||||
The algorithm returns the cut points for the variable.
|
- The algorithm returns the cut points for the variable.
|
||||||
|
|
||||||
|
- The transform method uses the cut points returning its index in the following way:
|
||||||
|
|
||||||
|
cut[i - 1] <= x < cut[i]
|
||||||
|
|
||||||
|
using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
|
||||||
|
|
||||||
|
- K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.
|
||||||
|
|
||||||
## Sample
|
## Sample
|
||||||
|
|
||||||
@@ -34,6 +42,5 @@ build/sample/sample -h
|
|||||||
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd tests
|
make test
|
||||||
./test
|
|
||||||
```
|
```
|
||||||
|
@@ -139,12 +139,12 @@ void process_file(const string& path, const string& file_name, bool class_last,
|
|||||||
std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
|
std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
|
||||||
}
|
}
|
||||||
auto Xt = torch::tensor(X[0], torch::kFloat32);
|
auto Xt = torch::tensor(X[0], torch::kFloat32);
|
||||||
auto yt = torch::tensor(y, torch::kInt64);
|
auto yt = torch::tensor(y, torch::kInt32);
|
||||||
//test.fit_t(Xt, yt);
|
//test.fit_t(Xt, yt);
|
||||||
auto result = test.fit_transform_t(Xt, yt);
|
auto result = test.fit_transform_t(Xt, yt);
|
||||||
std::cout << "Transformed data (torch)...: " << std::endl;
|
std::cout << "Transformed data (torch)...: " << std::endl;
|
||||||
for (int i = 130; i < 135; i++) {
|
for (int i = 130; i < 135; i++) {
|
||||||
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << result[i].item<int64_t>() << std::endl;
|
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << result[i].item<int>() << std::endl;
|
||||||
}
|
}
|
||||||
auto disc = mdlp::BinDisc(3);
|
auto disc = mdlp::BinDisc(3);
|
||||||
auto res_v = disc.fit_transform(X[0], y);
|
auto res_v = disc.fit_transform(X[0], y);
|
||||||
@@ -152,7 +152,7 @@ void process_file(const string& path, const string& file_name, bool class_last,
|
|||||||
auto res_t = disc.transform_t(Xt);
|
auto res_t = disc.transform_t(Xt);
|
||||||
std::cout << "Transformed data (BinDisc)...: " << std::endl;
|
std::cout << "Transformed data (BinDisc)...: " << std::endl;
|
||||||
for (int i = 130; i < 135; i++) {
|
for (int i = 130; i < 135; i++) {
|
||||||
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << res_v[i] << " " << res_t[i].item<int64_t>() << std::endl;
|
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << res_v[i] << " " << res_t[i].item<int>() << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -3,7 +3,7 @@ sonar.organization=rmontanana
|
|||||||
|
|
||||||
# This is the name and version displayed in the SonarCloud UI.
|
# This is the name and version displayed in the SonarCloud UI.
|
||||||
sonar.projectName=mdlp
|
sonar.projectName=mdlp
|
||||||
sonar.projectVersion=1.1.3
|
sonar.projectVersion=1.2.1
|
||||||
# sonar.test.exclusions=tests/**
|
# sonar.test.exclusions=tests/**
|
||||||
# sonar.tests=tests/
|
# sonar.tests=tests/
|
||||||
# sonar.coverage.exclusions=tests/**,sample/**
|
# sonar.coverage.exclusions=tests/**,sample/**
|
||||||
@@ -11,4 +11,4 @@ sonar.projectVersion=1.1.3
|
|||||||
#sonar.sources=.
|
#sonar.sources=.
|
||||||
|
|
||||||
# Encoding of the source code. Default is default system encoding
|
# Encoding of the source code. Default is default system encoding
|
||||||
sonar.sourceEncoding=UTF-8
|
sonar.sourceEncoding=UTF-8
|
||||||
|
@@ -335,10 +335,10 @@ namespace mdlp {
|
|||||||
auto Xtt = fit_transform(X[0], file.getY());
|
auto Xtt = fit_transform(X[0], file.getY());
|
||||||
EXPECT_EQ(expected, Xtt);
|
EXPECT_EQ(expected, Xtt);
|
||||||
auto Xt_t = torch::tensor(X[0], torch::kFloat32);
|
auto Xt_t = torch::tensor(X[0], torch::kFloat32);
|
||||||
auto y_t = torch::tensor(file.getY(), torch::kInt64);
|
auto y_t = torch::tensor(file.getY(), torch::kInt32);
|
||||||
auto Xtt_t = fit_transform_t(Xt_t, y_t);
|
auto Xtt_t = fit_transform_t(Xt_t, y_t);
|
||||||
for (int i = 0; i < expected.size(); i++)
|
for (int i = 0; i < expected.size(); i++)
|
||||||
EXPECT_EQ(expected[i], Xtt_t[i].item<int64_t>());
|
EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc4Q, irisQuantile)
|
TEST_F(TestBinDisc4Q, irisQuantile)
|
||||||
{
|
{
|
||||||
@@ -352,13 +352,13 @@ namespace mdlp {
|
|||||||
auto Xtt = fit_transform(X[0], file.getY());
|
auto Xtt = fit_transform(X[0], file.getY());
|
||||||
EXPECT_EQ(expected, Xtt);
|
EXPECT_EQ(expected, Xtt);
|
||||||
auto Xt_t = torch::tensor(X[0], torch::kFloat32);
|
auto Xt_t = torch::tensor(X[0], torch::kFloat32);
|
||||||
auto y_t = torch::tensor(file.getY(), torch::kInt64);
|
auto y_t = torch::tensor(file.getY(), torch::kInt32);
|
||||||
auto Xtt_t = fit_transform_t(Xt_t, y_t);
|
auto Xtt_t = fit_transform_t(Xt_t, y_t);
|
||||||
for (int i = 0; i < expected.size(); i++)
|
for (int i = 0; i < expected.size(); i++)
|
||||||
EXPECT_EQ(expected[i], Xtt_t[i].item<int64_t>());
|
EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
|
||||||
fit_t(Xt_t, y_t);
|
fit_t(Xt_t, y_t);
|
||||||
auto Xt_t2 = transform_t(Xt_t);
|
auto Xt_t2 = transform_t(Xt_t);
|
||||||
for (int i = 0; i < expected.size(); i++)
|
for (int i = 0; i < expected.size(); i++)
|
||||||
EXPECT_EQ(expected[i], Xt_t2[i].item<int64_t>());
|
EXPECT_EQ(expected[i], Xt_t2[i].item<int>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user