mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-17 16:35:57 +00:00
Compare commits
10 Commits
ab3786e2a2
...
v1.2.0
Author | SHA1 | Date | |
---|---|---|---|
7ff88c8e4b
|
|||
|
638bb2a59e | ||
|
f258fc220f | ||
0beeda320d
|
|||
6b68a41c42
|
|||
236d1b2f8b
|
|||
52ee93178f
|
|||
eeda4347e9
|
|||
5708dc3de9
|
|||
fbffc3a9c4
|
@@ -1,18 +1,16 @@
|
|||||||
FROM mcr.microsoft.com/devcontainers/cpp:0-ubuntu-22.04
|
FROM mcr.microsoft.com/devcontainers/cpp:0-ubuntu-22.04
|
||||||
|
|
||||||
ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.22.2"
|
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||||
|
&& apt-get -y install --no-install-recommends \
|
||||||
# Optionally install the cmake for vcpkg
|
python3 \
|
||||||
COPY ./reinstall-cmake.sh /tmp/
|
python3-pip \
|
||||||
|
lcov \
|
||||||
RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \
|
cmake \
|
||||||
chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \
|
&& apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
|
||||||
fi \
|
|
||||||
&& rm -f /tmp/reinstall-cmake.sh
|
|
||||||
|
|
||||||
|
RUN pip3 install --no-cache-dir \
|
||||||
|
cpplint \
|
||||||
|
cmake-format\
|
||||||
|
gcovr
|
||||||
# [Optional] Uncomment this section to install additional vcpkg ports.
|
# [Optional] Uncomment this section to install additional vcpkg ports.
|
||||||
# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
|
# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
|
||||||
|
|
||||||
# [Optional] Uncomment this section to install additional packages.
|
|
||||||
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
|
||||||
# && apt-get -y install --no-install-recommends <your-package-list-here>
|
|
||||||
|
7
.github/workflows/build.yml
vendored
7
.github/workflows/build.yml
vendored
@@ -13,11 +13,11 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3.2.0
|
- uses: actions/checkout@v4.1.6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||||
- name: Install sonar-scanner and build-wrapper
|
- name: Install sonar-scanner and build-wrapper
|
||||||
uses: SonarSource/sonarcloud-github-c-cpp@v1
|
uses: SonarSource/sonarcloud-github-c-cpp@v2
|
||||||
- name: Install lcov & gcovr
|
- name: Install lcov & gcovr
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -y install lcov
|
sudo apt-get -y install lcov
|
||||||
@@ -30,8 +30,7 @@ jobs:
|
|||||||
make
|
make
|
||||||
ctest -C Release --output-on-failure --test-dir tests
|
ctest -C Release --output-on-failure --test-dir tests
|
||||||
cd ..
|
cd ..
|
||||||
# gcovr -f CPPFImdlp.cpp -f Metrics.cpp --merge-mode-functions=separate --txt --sonarqube=coverage.xml
|
gcovr -f CPPFImdlp.cpp -f Metrics.cpp -f BinDisc.cpp --txt --sonarqube=coverage.xml
|
||||||
gcovr -f CPPFImdlp.cpp -f Metrics.cpp --txt --sonarqube=coverage.xml
|
|
||||||
- name: Run sonar-scanner
|
- name: Run sonar-scanner
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@@ -31,6 +31,8 @@
|
|||||||
*.out
|
*.out
|
||||||
*.app
|
*.app
|
||||||
**/build
|
**/build
|
||||||
|
build_Debug
|
||||||
|
build_Release
|
||||||
**/lcoverage
|
**/lcoverage
|
||||||
.idea
|
.idea
|
||||||
cmake-*
|
cmake-*
|
||||||
|
11
.vscode/launch.json
vendored
11
.vscode/launch.json
vendored
@@ -8,15 +8,10 @@
|
|||||||
"name": "C++ Launch config",
|
"name": "C++ Launch config",
|
||||||
"type": "cppdbg",
|
"type": "cppdbg",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/build/sample/sample",
|
"program": "${workspaceFolder}/tests/build/BinDisc_unittest",
|
||||||
"cwd": "${workspaceFolder}/build/sample",
|
"cwd": "${workspaceFolder}/tests/build",
|
||||||
"args": [
|
"args": [],
|
||||||
"-f",
|
|
||||||
"glass"
|
|
||||||
],
|
|
||||||
"targetArchitecture": "arm64",
|
|
||||||
"launchCompleteCommand": "exec-run",
|
"launchCompleteCommand": "exec-run",
|
||||||
"preLaunchTask": "CMake: build",
|
|
||||||
"stopAtEntry": false,
|
"stopAtEntry": false,
|
||||||
"linux": {
|
"linux": {
|
||||||
"MIMode": "gdb",
|
"MIMode": "gdb",
|
||||||
|
86
.vscode/settings.json
vendored
86
.vscode/settings.json
vendored
@@ -5,5 +5,89 @@
|
|||||||
},
|
},
|
||||||
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
||||||
"cmake.configureOnOpen": true,
|
"cmake.configureOnOpen": true,
|
||||||
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json"
|
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json",
|
||||||
|
"files.associations": {
|
||||||
|
"*.rmd": "markdown",
|
||||||
|
"*.py": "python",
|
||||||
|
"vector": "cpp",
|
||||||
|
"__bit_reference": "cpp",
|
||||||
|
"__bits": "cpp",
|
||||||
|
"__config": "cpp",
|
||||||
|
"__debug": "cpp",
|
||||||
|
"__errc": "cpp",
|
||||||
|
"__hash_table": "cpp",
|
||||||
|
"__locale": "cpp",
|
||||||
|
"__mutex_base": "cpp",
|
||||||
|
"__node_handle": "cpp",
|
||||||
|
"__nullptr": "cpp",
|
||||||
|
"__split_buffer": "cpp",
|
||||||
|
"__string": "cpp",
|
||||||
|
"__threading_support": "cpp",
|
||||||
|
"__tuple": "cpp",
|
||||||
|
"array": "cpp",
|
||||||
|
"atomic": "cpp",
|
||||||
|
"bitset": "cpp",
|
||||||
|
"cctype": "cpp",
|
||||||
|
"chrono": "cpp",
|
||||||
|
"clocale": "cpp",
|
||||||
|
"cmath": "cpp",
|
||||||
|
"compare": "cpp",
|
||||||
|
"complex": "cpp",
|
||||||
|
"concepts": "cpp",
|
||||||
|
"cstdarg": "cpp",
|
||||||
|
"cstddef": "cpp",
|
||||||
|
"cstdint": "cpp",
|
||||||
|
"cstdio": "cpp",
|
||||||
|
"cstdlib": "cpp",
|
||||||
|
"cstring": "cpp",
|
||||||
|
"ctime": "cpp",
|
||||||
|
"cwchar": "cpp",
|
||||||
|
"cwctype": "cpp",
|
||||||
|
"exception": "cpp",
|
||||||
|
"initializer_list": "cpp",
|
||||||
|
"ios": "cpp",
|
||||||
|
"iosfwd": "cpp",
|
||||||
|
"istream": "cpp",
|
||||||
|
"limits": "cpp",
|
||||||
|
"locale": "cpp",
|
||||||
|
"memory": "cpp",
|
||||||
|
"mutex": "cpp",
|
||||||
|
"new": "cpp",
|
||||||
|
"optional": "cpp",
|
||||||
|
"ostream": "cpp",
|
||||||
|
"ratio": "cpp",
|
||||||
|
"sstream": "cpp",
|
||||||
|
"stdexcept": "cpp",
|
||||||
|
"streambuf": "cpp",
|
||||||
|
"string": "cpp",
|
||||||
|
"string_view": "cpp",
|
||||||
|
"system_error": "cpp",
|
||||||
|
"tuple": "cpp",
|
||||||
|
"type_traits": "cpp",
|
||||||
|
"typeinfo": "cpp",
|
||||||
|
"unordered_map": "cpp",
|
||||||
|
"variant": "cpp",
|
||||||
|
"algorithm": "cpp",
|
||||||
|
"iostream": "cpp",
|
||||||
|
"iomanip": "cpp",
|
||||||
|
"numeric": "cpp",
|
||||||
|
"set": "cpp",
|
||||||
|
"__tree": "cpp",
|
||||||
|
"deque": "cpp",
|
||||||
|
"list": "cpp",
|
||||||
|
"map": "cpp",
|
||||||
|
"unordered_set": "cpp",
|
||||||
|
"any": "cpp",
|
||||||
|
"condition_variable": "cpp",
|
||||||
|
"forward_list": "cpp",
|
||||||
|
"fstream": "cpp",
|
||||||
|
"stack": "cpp",
|
||||||
|
"thread": "cpp",
|
||||||
|
"__memory": "cpp",
|
||||||
|
"filesystem": "cpp",
|
||||||
|
"*.toml": "toml",
|
||||||
|
"utility": "cpp",
|
||||||
|
"span": "cpp",
|
||||||
|
"*.tcc": "cpp"
|
||||||
|
}
|
||||||
}
|
}
|
99
BinDisc.cpp
Normal file
99
BinDisc.cpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include <algorithm>
|
||||||
|
#include <limits>
|
||||||
|
#include <cmath>
|
||||||
|
#include "BinDisc.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
|
||||||
|
BinDisc::BinDisc(int n_bins, strategy_t strategy) :
|
||||||
|
Discretizer(), n_bins{ n_bins }, strategy{ strategy }
|
||||||
|
{
|
||||||
|
if (n_bins < 3) {
|
||||||
|
throw std::invalid_argument("n_bins must be greater than 2");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BinDisc::~BinDisc() = default;
|
||||||
|
void BinDisc::fit(samples_t& X)
|
||||||
|
{
|
||||||
|
// y is included for compatibility with the Discretizer interface
|
||||||
|
cutPoints.clear();
|
||||||
|
if (X.empty()) {
|
||||||
|
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (strategy == strategy_t::QUANTILE) {
|
||||||
|
fit_quantile(X);
|
||||||
|
} else if (strategy == strategy_t::UNIFORM) {
|
||||||
|
fit_uniform(X);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void BinDisc::fit(samples_t& X, labels_t& y)
|
||||||
|
{
|
||||||
|
fit(X);
|
||||||
|
}
|
||||||
|
std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
|
||||||
|
{
|
||||||
|
// Doesn't include end point as it is not needed
|
||||||
|
if (start == end) {
|
||||||
|
return { 0 };
|
||||||
|
}
|
||||||
|
precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
|
||||||
|
std::vector<precision_t> linspc;
|
||||||
|
for (size_t i = 0; i < num - 1; ++i) {
|
||||||
|
precision_t val = start + delta * static_cast<precision_t>(i);
|
||||||
|
linspc.push_back(val);
|
||||||
|
}
|
||||||
|
return linspc;
|
||||||
|
}
|
||||||
|
size_t clip(const size_t n, size_t lower, size_t upper)
|
||||||
|
{
|
||||||
|
return std::max(lower, std::min(n, upper));
|
||||||
|
}
|
||||||
|
std::vector<precision_t> percentile(samples_t& data, std::vector<precision_t>& percentiles)
|
||||||
|
{
|
||||||
|
// Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
|
||||||
|
std::vector<precision_t> results;
|
||||||
|
results.reserve(percentiles.size());
|
||||||
|
for (auto percentile : percentiles) {
|
||||||
|
const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
|
||||||
|
const auto indexLower = clip(i, 0, data.size() - 1);
|
||||||
|
const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
|
||||||
|
const double fraction =
|
||||||
|
(percentile / 100.0 - percentI) /
|
||||||
|
(static_cast<double>(indexLower + 1) / static_cast<double>(data.size() - 1) - percentI);
|
||||||
|
const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction;
|
||||||
|
if (value != results.back())
|
||||||
|
results.push_back(value);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
void BinDisc::fit_quantile(samples_t& X)
|
||||||
|
{
|
||||||
|
auto quantiles = linspace(0.0, 100.0, n_bins + 1);
|
||||||
|
auto data = X;
|
||||||
|
std::sort(data.begin(), data.end());
|
||||||
|
if (data.front() == data.back() || data.size() == 1) {
|
||||||
|
// if X is constant
|
||||||
|
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
cutPoints = percentile(data, quantiles);
|
||||||
|
normalizeCutPoints();
|
||||||
|
}
|
||||||
|
void BinDisc::fit_uniform(samples_t& X)
|
||||||
|
{
|
||||||
|
|
||||||
|
auto minmax = std::minmax_element(X.begin(), X.end());
|
||||||
|
cutPoints = linspace(*minmax.first, *minmax.second, n_bins + 1);
|
||||||
|
normalizeCutPoints();
|
||||||
|
}
|
||||||
|
void BinDisc::normalizeCutPoints()
|
||||||
|
{
|
||||||
|
// Add max value to the end
|
||||||
|
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
||||||
|
// Remove first as it is not needed
|
||||||
|
cutPoints.erase(cutPoints.begin());
|
||||||
|
}
|
||||||
|
}
|
29
BinDisc.h
Normal file
29
BinDisc.h
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
#ifndef BINDISC_H
|
||||||
|
#define BINDISC_H
|
||||||
|
|
||||||
|
#include "typesFImdlp.h"
|
||||||
|
#include "Discretizer.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
|
||||||
|
enum class strategy_t {
|
||||||
|
UNIFORM,
|
||||||
|
QUANTILE
|
||||||
|
};
|
||||||
|
class BinDisc : public Discretizer {
|
||||||
|
public:
|
||||||
|
BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
|
||||||
|
~BinDisc();
|
||||||
|
// y is included for compatibility with the Discretizer interface
|
||||||
|
void fit(samples_t& X_, labels_t& y) override;
|
||||||
|
void fit(samples_t& X);
|
||||||
|
private:
|
||||||
|
void fit_uniform(samples_t&);
|
||||||
|
void fit_quantile(samples_t&);
|
||||||
|
void normalizeCutPoints();
|
||||||
|
int n_bins;
|
||||||
|
strategy_t strategy;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -7,7 +7,7 @@ endif ()
|
|||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
|
||||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp sample/sample.cpp)
|
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||||
add_subdirectory(sample)
|
add_subdirectory(sample)
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
|
|
||||||
|
@@ -3,20 +3,17 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include "CPPFImdlp.h"
|
#include "CPPFImdlp.h"
|
||||||
#include "Metrics.h"
|
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) :
|
||||||
|
Discretizer(),
|
||||||
|
min_length(min_length_),
|
||||||
max_depth(max_depth_),
|
max_depth(max_depth_),
|
||||||
proposed_cuts(proposed)
|
proposed_cuts(proposed)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp() = default;
|
|
||||||
|
|
||||||
CPPFImdlp::~CPPFImdlp() = default;
|
|
||||||
|
|
||||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||||
{
|
{
|
||||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||||
@@ -178,7 +175,7 @@ namespace mdlp {
|
|||||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||||
{
|
{
|
||||||
indices_t idx(X_.size());
|
indices_t idx(X_.size());
|
||||||
iota(idx.begin(), idx.end(), 0);
|
std::iota(idx.begin(), idx.end(), 0);
|
||||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||||
if (X_[i1] == X_[i2])
|
if (X_[i1] == X_[i2])
|
||||||
return y_[i1] < y_[i2];
|
return y_[i1] < y_[i2];
|
||||||
@@ -209,13 +206,5 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||||
}
|
}
|
||||||
labels_t& CPPFImdlp::transform(const samples_t& data)
|
|
||||||
{
|
|
||||||
discretizedData.reserve(data.size());
|
|
||||||
for (const precision_t& item : data) {
|
|
||||||
auto upper = upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
|
||||||
discretizedData.push_back(upper - cutPoints.begin());
|
|
||||||
}
|
|
||||||
return discretizedData;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
25
CPPFImdlp.h
25
CPPFImdlp.h
@@ -2,13 +2,20 @@
|
|||||||
#define CPPFIMDLP_H
|
#define CPPFIMDLP_H
|
||||||
|
|
||||||
#include "typesFImdlp.h"
|
#include "typesFImdlp.h"
|
||||||
#include "Metrics.h"
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include "Metrics.h"
|
||||||
|
#include "Discretizer.h"
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class CPPFImdlp {
|
class CPPFImdlp : public Discretizer {
|
||||||
|
public:
|
||||||
|
CPPFImdlp() = default;
|
||||||
|
CPPFImdlp(size_t min_length_, int max_depth_, float proposed);
|
||||||
|
virtual ~CPPFImdlp() = default;
|
||||||
|
void fit(samples_t& X_, labels_t& y_) override;
|
||||||
|
inline int get_depth() const { return depth; };
|
||||||
protected:
|
protected:
|
||||||
size_t min_length = 3;
|
size_t min_length = 3;
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
@@ -18,28 +25,14 @@ namespace mdlp {
|
|||||||
samples_t X = samples_t();
|
samples_t X = samples_t();
|
||||||
labels_t y = labels_t();
|
labels_t y = labels_t();
|
||||||
Metrics metrics = Metrics(y, indices);
|
Metrics metrics = Metrics(y, indices);
|
||||||
cutPoints_t cutPoints;
|
|
||||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||||
labels_t discretizedData = labels_t();
|
|
||||||
|
|
||||||
static indices_t sortIndices(samples_t&, labels_t&);
|
static indices_t sortIndices(samples_t&, labels_t&);
|
||||||
|
|
||||||
void computeCutPoints(size_t, size_t, int);
|
void computeCutPoints(size_t, size_t, int);
|
||||||
void resizeCutPoints();
|
void resizeCutPoints();
|
||||||
bool mdlp(size_t, size_t, size_t);
|
bool mdlp(size_t, size_t, size_t);
|
||||||
size_t getCandidate(size_t, size_t);
|
size_t getCandidate(size_t, size_t);
|
||||||
size_t compute_max_num_cut_points() const;
|
size_t compute_max_num_cut_points() const;
|
||||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||||
|
|
||||||
public:
|
|
||||||
CPPFImdlp();
|
|
||||||
CPPFImdlp(size_t, int, float);
|
|
||||||
~CPPFImdlp();
|
|
||||||
void fit(samples_t&, labels_t&);
|
|
||||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
|
||||||
labels_t& transform(const samples_t&);
|
|
||||||
inline int get_depth() const { return depth; };
|
|
||||||
static inline string version() { return "1.1.2"; };
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
31
Discretizer.h
Normal file
31
Discretizer.h
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#ifndef DISCRETIZER_H
|
||||||
|
#define DISCRETIZER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "typesFImdlp.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
class Discretizer {
|
||||||
|
public:
|
||||||
|
Discretizer() = default;
|
||||||
|
virtual ~Discretizer() = default;
|
||||||
|
virtual void fit(samples_t& X_, labels_t& y_) = 0;
|
||||||
|
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||||
|
labels_t& transform(const samples_t& data)
|
||||||
|
{
|
||||||
|
discretizedData.clear();
|
||||||
|
discretizedData.reserve(data.size());
|
||||||
|
for (const precision_t& item : data) {
|
||||||
|
auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
||||||
|
discretizedData.push_back(upper - cutPoints.begin());
|
||||||
|
}
|
||||||
|
return discretizedData;
|
||||||
|
};
|
||||||
|
static inline std::string version() { return "1.2.0"; };
|
||||||
|
protected:
|
||||||
|
labels_t discretizedData = labels_t();
|
||||||
|
cutPoints_t cutPoints;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -2,7 +2,7 @@
|
|||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||||
|
|
||||||
# mdlp
|
# <img src="logo.png" alt="logo" width="50"/> mdlp
|
||||||
|
|
||||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||||
|
|
||||||
|
@@ -3,7 +3,7 @@ sonar.organization=rmontanana
|
|||||||
|
|
||||||
# This is the name and version displayed in the SonarCloud UI.
|
# This is the name and version displayed in the SonarCloud UI.
|
||||||
sonar.projectName=mdlp
|
sonar.projectName=mdlp
|
||||||
sonar.projectVersion=1.0.2
|
sonar.projectVersion=1.1.3
|
||||||
# sonar.test.exclusions=tests/**
|
# sonar.test.exclusions=tests/**
|
||||||
# sonar.tests=tests/
|
# sonar.tests=tests/
|
||||||
# sonar.coverage.exclusions=tests/**,sample/**
|
# sonar.coverage.exclusions=tests/**,sample/**
|
||||||
|
346
tests/BinDisc_unittest.cpp
Normal file
346
tests/BinDisc_unittest.cpp
Normal file
@@ -0,0 +1,346 @@
|
|||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "ArffFiles.h"
|
||||||
|
#include "../BinDisc.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
const float margin = 1e-4;
|
||||||
|
static std::string set_data_path()
|
||||||
|
{
|
||||||
|
std::string path = "../datasets/";
|
||||||
|
std::ifstream file(path + "iris.arff");
|
||||||
|
if (file.is_open()) {
|
||||||
|
file.close();
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
return "../../tests/datasets/";
|
||||||
|
}
|
||||||
|
const std::string data_path = set_data_path();
|
||||||
|
class TestBinDisc3U : public BinDisc, public testing::Test {
|
||||||
|
public:
|
||||||
|
TestBinDisc3U(int n_bins = 3) : BinDisc(n_bins, strategy_t::UNIFORM) {};
|
||||||
|
};
|
||||||
|
class TestBinDisc3Q : public BinDisc, public testing::Test {
|
||||||
|
public:
|
||||||
|
TestBinDisc3Q(int n_bins = 3) : BinDisc(n_bins, strategy_t::QUANTILE) {};
|
||||||
|
};
|
||||||
|
class TestBinDisc4U : public BinDisc, public testing::Test {
|
||||||
|
public:
|
||||||
|
TestBinDisc4U(int n_bins = 4) : BinDisc(n_bins, strategy_t::UNIFORM) {};
|
||||||
|
};
|
||||||
|
class TestBinDisc4Q : public BinDisc, public testing::Test {
|
||||||
|
public:
|
||||||
|
TestBinDisc4Q(int n_bins = 4) : BinDisc(n_bins, strategy_t::QUANTILE) {};
|
||||||
|
};
|
||||||
|
TEST_F(TestBinDisc3U, Easy3BinsUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||||
|
auto y = labels_t();
|
||||||
|
fit(X, y);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_NEAR(3.66667, cuts.at(0), margin);
|
||||||
|
EXPECT_NEAR(6.33333, cuts.at(1), margin);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts.at(2));
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3Q, Easy3BinsQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_NEAR(3.666667, cuts[0], margin);
|
||||||
|
EXPECT_NEAR(6.333333, cuts[1], margin);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3U, X10BinsUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_EQ(4.0, cuts[0]);
|
||||||
|
EXPECT_EQ(7.0, cuts[1]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3Q, X10BinsQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_EQ(4, cuts[0]);
|
||||||
|
EXPECT_EQ(7, cuts[1]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3U, X11BinsUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_NEAR(4.33333, cuts[0], margin);
|
||||||
|
EXPECT_NEAR(7.66667, cuts[1], margin);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3U, X11BinsQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_NEAR(4.33333, cuts[0], margin);
|
||||||
|
EXPECT_NEAR(7.66667, cuts[1], margin);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3U, ConstantUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(1, cuts.size());
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3Q, ConstantQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(1, cuts.size());
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3U, EmptyUniform)
|
||||||
|
{
|
||||||
|
samples_t X = {};
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(1, cuts.size());
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3Q, EmptyQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = {};
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(1, cuts.size());
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
|
}
|
||||||
|
TEST(TestBinDisc3, ExceptionNumberBins)
|
||||||
|
{
|
||||||
|
EXPECT_THROW(BinDisc(2), std::invalid_argument);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3U, EasyRepeated)
|
||||||
|
{
|
||||||
|
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_NEAR(1.66667, cuts[0], margin);
|
||||||
|
EXPECT_NEAR(2.33333, cuts[1], margin);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc3Q, EasyRepeated)
|
||||||
|
{
|
||||||
|
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(2, cuts.size());
|
||||||
|
EXPECT_NEAR(1.66667, cuts[0], margin);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[1]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4U, Easy4BinsUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
ASSERT_EQ(3.75, cuts[0]);
|
||||||
|
EXPECT_EQ(6.5, cuts[1]);
|
||||||
|
EXPECT_EQ(9.25, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4Q, Easy4BinsQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
ASSERT_EQ(3.75, cuts[0]);
|
||||||
|
EXPECT_EQ(6.5, cuts[1]);
|
||||||
|
EXPECT_EQ(9.25, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4U, X13BinsUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
EXPECT_EQ(4.0, cuts[0]);
|
||||||
|
EXPECT_EQ(7.0, cuts[1]);
|
||||||
|
EXPECT_EQ(10.0, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4Q, X13BinsQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
EXPECT_EQ(4.0, cuts[0]);
|
||||||
|
EXPECT_EQ(7.0, cuts[1]);
|
||||||
|
EXPECT_EQ(10.0, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4U, X14BinsUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
EXPECT_EQ(4.25, cuts[0]);
|
||||||
|
EXPECT_EQ(7.5, cuts[1]);
|
||||||
|
EXPECT_EQ(10.75, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4Q, X14BinsQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
EXPECT_EQ(4.25, cuts[0]);
|
||||||
|
EXPECT_EQ(7.5, cuts[1]);
|
||||||
|
EXPECT_EQ(10.75, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4U, X15BinsUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
EXPECT_EQ(4.5, cuts[0]);
|
||||||
|
EXPECT_EQ(8, cuts[1]);
|
||||||
|
EXPECT_EQ(11.5, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4Q, X15BinsQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
EXPECT_EQ(4.5, cuts[0]);
|
||||||
|
EXPECT_EQ(8, cuts[1]);
|
||||||
|
EXPECT_EQ(11.5, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4U, RepeatedValuesUniform)
|
||||||
|
{
|
||||||
|
samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
EXPECT_EQ(1.0, cuts[0]);
|
||||||
|
EXPECT_EQ(2.0, cuts[1]);
|
||||||
|
ASSERT_EQ(3.0, cuts[2]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4Q, RepeatedValuesQuantile)
|
||||||
|
{
|
||||||
|
samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
|
fit(X);
|
||||||
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
|
EXPECT_EQ(2.0, cuts[0]);
|
||||||
|
ASSERT_EQ(3.0, cuts[1]);
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
|
auto labels = transform(X);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, labels);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4U, irisUniform)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
fit(X[0]);
|
||||||
|
auto Xt = transform(X[0]);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
||||||
|
EXPECT_EQ(expected, Xt);
|
||||||
|
}
|
||||||
|
TEST_F(TestBinDisc4Q, irisQuantile)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
fit(X[0]);
|
||||||
|
auto Xt = transform(X[0]);
|
||||||
|
labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
||||||
|
EXPECT_EQ(expected, Xt);
|
||||||
|
}
|
||||||
|
}
|
@@ -1,3 +1,4 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
@@ -15,15 +16,28 @@ FetchContent_MakeAvailable(googletest)
|
|||||||
enable_testing()
|
enable_testing()
|
||||||
|
|
||||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
||||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
|
||||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
|
||||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
|
||||||
target_link_options(Metrics_unittest PRIVATE --coverage)
|
target_link_options(Metrics_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
||||||
|
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
||||||
|
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||||
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp)
|
||||||
|
target_link_libraries(BinDisc_unittest GTest::gtest_main)
|
||||||
|
target_compile_options(BinDisc_unittest PRIVATE --coverage)
|
||||||
|
target_link_options(BinDisc_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(Discretizer_unittest ../BinDisc.cpp ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp Discretizer_unittest.cpp)
|
||||||
|
target_link_libraries(Discretizer_unittest GTest::gtest_main)
|
||||||
|
target_compile_options(Discretizer_unittest PRIVATE --coverage)
|
||||||
|
target_link_options(Discretizer_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
include(GoogleTest)
|
include(GoogleTest)
|
||||||
|
|
||||||
gtest_discover_tests(Metrics_unittest)
|
gtest_discover_tests(Metrics_unittest)
|
||||||
gtest_discover_tests(FImdlp_unittest)
|
gtest_discover_tests(FImdlp_unittest)
|
||||||
|
gtest_discover_tests(BinDisc_unittest)
|
||||||
|
gtest_discover_tests(Discretizer_unittest)
|
74
tests/Discretizer_unittest.cpp
Normal file
74
tests/Discretizer_unittest.cpp
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "ArffFiles.h"
|
||||||
|
#include "../Discretizer.h"
|
||||||
|
#include "../BinDisc.h"
|
||||||
|
#include "../CPPFImdlp.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
const float margin = 1e-4;
|
||||||
|
static std::string set_data_path()
|
||||||
|
{
|
||||||
|
std::string path = "../datasets/";
|
||||||
|
std::ifstream file(path + "iris.arff");
|
||||||
|
if (file.is_open()) {
|
||||||
|
file.close();
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
return "../../tests/datasets/";
|
||||||
|
}
|
||||||
|
const std::string data_path = set_data_path();
|
||||||
|
|
||||||
|
TEST(Discretizer, BinIrisUniform)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
auto y = labels_t();
|
||||||
|
disc->fit(X[0], y);
|
||||||
|
auto Xt = disc->transform(X[0]);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(expected, Xt);
|
||||||
|
}
|
||||||
|
TEST(Discretizer, BinIrisQuantile)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
auto y = labels_t();
|
||||||
|
disc->fit(X[0], y);
|
||||||
|
auto Xt = disc->transform(X[0]);
|
||||||
|
labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(expected, Xt);
|
||||||
|
}
|
||||||
|
TEST(Discretizer, FImdlpIris)
|
||||||
|
{
|
||||||
|
labels_t expected = {
|
||||||
|
5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||||
|
5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
|
||||||
|
5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
|
||||||
|
3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
|
||||||
|
3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
|
||||||
|
4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
|
||||||
|
1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
|
||||||
|
};
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new CPPFImdlp();
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
labels_t& y = file.getY();
|
||||||
|
disc->fit(X[1], y);
|
||||||
|
auto computed = disc->transform(X[1]);
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
|
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||||
|
EXPECT_EQ(computed[i], expected[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
10
tests/test
10
tests/test
@@ -1,20 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
if [ -d build ] ; then
|
if [ -d build ] ; then
|
||||||
rm -fr build
|
rm -fr build
|
||||||
fi
|
fi
|
||||||
if [ -d gcovr-report ] ; then
|
if [ -d gcovr-report ] ; then
|
||||||
rm -fr gcovr-report
|
rm -fr gcovr-report
|
||||||
fi
|
fi
|
||||||
cmake -S . -B build -Wno-dev
|
cmake -S . -B build -Wno-dev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="--coverage" -DCMAKE_C_FLAGS="--coverage"
|
||||||
cmake --build build
|
cmake --build build
|
||||||
cd build
|
cd build
|
||||||
ctest --output-on-failure
|
ctest --output-on-failure
|
||||||
cd ..
|
cd ..
|
||||||
if [ ! -d gcovr-report ] ; then
|
mkdir gcovr-report
|
||||||
mkdir gcovr-report
|
|
||||||
fi
|
|
||||||
rm -fr gcovr-report/* 2>/dev/null
|
|
||||||
#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
||||||
#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
|
#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
|
||||||
#lcov --list lcoverage/main_coverage.info
|
#lcov --list lcoverage/main_coverage.info
|
||||||
cd ..
|
cd ..
|
||||||
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml
|
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --gcov-filter "Discretizer.h" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines
|
||||||
|
412
tests/testKbins.py
Normal file
412
tests/testKbins.py
Normal file
@@ -0,0 +1,412 @@
|
|||||||
|
from scipy.io.arff import loadarff
|
||||||
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
|
|
||||||
|
|
||||||
|
def test(clf, X, expected, title):
|
||||||
|
X = [[x] for x in X]
|
||||||
|
clf.fit(X)
|
||||||
|
computed = [int(x[0]) for x in clf.transform(X)]
|
||||||
|
print(f"{title}")
|
||||||
|
print(f"{computed=}")
|
||||||
|
print(f"{expected=}")
|
||||||
|
assert computed == expected
|
||||||
|
print("-" * 80)
|
||||||
|
|
||||||
|
|
||||||
|
# Test Uniform Strategy
|
||||||
|
clf3u = KBinsDiscretizer(
|
||||||
|
n_bins=3, encode="ordinal", strategy="uniform", subsample=200_000
|
||||||
|
)
|
||||||
|
clf3q = KBinsDiscretizer(
|
||||||
|
n_bins=3, encode="ordinal", strategy="quantile", subsample=200_000
|
||||||
|
)
|
||||||
|
clf4u = KBinsDiscretizer(
|
||||||
|
n_bins=4, encode="ordinal", strategy="uniform", subsample=200_000
|
||||||
|
)
|
||||||
|
clf4q = KBinsDiscretizer(
|
||||||
|
n_bins=4, encode="ordinal", strategy="quantile", subsample=200_000
|
||||||
|
)
|
||||||
|
#
|
||||||
|
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
|
||||||
|
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2]
|
||||||
|
test(clf3u, X, labels, title="Easy3BinsUniform")
|
||||||
|
test(clf3q, X, labels, title="Easy3BinsQuantile")
|
||||||
|
#
|
||||||
|
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
|
||||||
|
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 2]
|
||||||
|
# En C++ se obtiene el mismo resultado en ambos, no como aquí
|
||||||
|
labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2]
|
||||||
|
test(clf3u, X, labels, title="X10BinsUniform")
|
||||||
|
test(clf3q, X, labels2, title="X10BinsQuantile")
|
||||||
|
#
|
||||||
|
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
|
||||||
|
labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2]
|
||||||
|
# En C++ se obtiene el mismo resultado en ambos, no como aquí
|
||||||
|
# labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2]
|
||||||
|
test(clf3u, X, labels, title="X11BinsUniform")
|
||||||
|
test(clf3q, X, labels, title="X11BinsQuantile")
|
||||||
|
#
|
||||||
|
X = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
||||||
|
labels = [0, 0, 0, 0, 0, 0]
|
||||||
|
test(clf3u, X, labels, title="ConstantUniform")
|
||||||
|
test(clf3q, X, labels, title="ConstantQuantile")
|
||||||
|
#
|
||||||
|
X = [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
||||||
|
labels = [2, 0, 0, 2, 0, 0, 2, 0, 0]
|
||||||
|
labels2 = [1, 0, 0, 1, 0, 0, 1, 0, 0] # igual que en C++
|
||||||
|
test(clf3u, X, labels, title="EasyRepeatedUniform")
|
||||||
|
test(clf3q, X, labels2, title="EasyRepeatedQuantile")
|
||||||
|
#
|
||||||
|
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
||||||
|
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
|
||||||
|
test(clf4u, X, labels, title="Easy4BinsUniform")
|
||||||
|
test(clf4q, X, labels, title="Easy4BinsQuantile")
|
||||||
|
#
|
||||||
|
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
||||||
|
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
|
||||||
|
test(clf4u, X, labels, title="X13BinsUniform")
|
||||||
|
test(clf4q, X, labels, title="X13BinsQuantile")
|
||||||
|
#
|
||||||
|
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
||||||
|
labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
|
||||||
|
test(clf4u, X, labels, title="X14BinsUniform")
|
||||||
|
test(clf4q, X, labels, title="X14BinsQuantile")
|
||||||
|
#
|
||||||
|
X1 = [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||||
|
X2 = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||||
|
labels1 = [3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0]
|
||||||
|
labels2 = [3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0]
|
||||||
|
test(clf4u, X1, labels1, title="X15BinsUniform")
|
||||||
|
test(clf4q, X2, labels2, title="X15BinsQuantile")
|
||||||
|
#
|
||||||
|
X = [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
||||||
|
labels = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3]
|
||||||
|
test(clf4u, X, labels, title="RepeatedValuesUniform")
|
||||||
|
test(clf4q, X, labels, title="RepeatedValuesQuantile")
|
||||||
|
|
||||||
|
print(f"Uniform {clf4u.bin_edges_=}")
|
||||||
|
print(f"Quaintile {clf4q.bin_edges_=}")
|
||||||
|
print("-" * 80)
|
||||||
|
#
|
||||||
|
data, meta = loadarff("tests/datasets/iris.arff")
|
||||||
|
|
||||||
|
labelsu = [
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
]
|
||||||
|
labelsq = [
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
]
|
||||||
|
# test(clf4u, data["sepallength"], labelsu, title="IrisUniform")
|
||||||
|
# test(clf4q, data["sepallength"], labelsq, title="IrisQuantile")
|
||||||
|
sepallength = [[x] for x in data["sepallength"]]
|
||||||
|
clf4u.fit(sepallength)
|
||||||
|
clf4q.fit(sepallength)
|
||||||
|
computedu = clf4u.transform(sepallength)
|
||||||
|
computedq = clf4q.transform(sepallength)
|
||||||
|
wrongu = 0
|
||||||
|
wrongq = 0
|
||||||
|
for i in range(len(labelsu)):
|
||||||
|
if labelsu[i] != computedu[i]:
|
||||||
|
wrongu += 1
|
||||||
|
if labelsq[i] != computedq[i]:
|
||||||
|
wrongq += 1
|
||||||
|
print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Uniform ={wrongu:3d}")
|
||||||
|
print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Quantile ={wrongq:3d}")
|
@@ -8,11 +8,11 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
typedef float precision_t;
|
typedef float precision_t;
|
||||||
typedef vector<precision_t> samples_t;
|
typedef std::vector<precision_t> samples_t;
|
||||||
typedef vector<int> labels_t;
|
typedef std::vector<int> labels_t;
|
||||||
typedef vector<size_t> indices_t;
|
typedef std::vector<size_t> indices_t;
|
||||||
typedef vector<precision_t> cutPoints_t;
|
typedef std::vector<precision_t> cutPoints_t;
|
||||||
typedef map<pair<int, int>, precision_t> cacheEnt_t;
|
typedef std::map<std::pair<int, int>, precision_t> cacheEnt_t;
|
||||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
typedef std::map<std::tuple<int, int, int>, precision_t> cacheIg_t;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user