mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-16 16:05:57 +00:00
Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
7ee9896734
|
|||
8f7f605670
|
|||
2f55b27691
|
|||
378fbd51ef
|
|||
402d0da878
|
|||
f34bcc2ed7
|
|||
c9ba35fb58
|
|||
e205668906
|
|||
633aa52849
|
|||
61de687476
|
|||
7ff88c8e4b
|
|||
|
638bb2a59e |
8
.github/workflows/build.yml
vendored
8
.github/workflows/build.yml
vendored
@@ -22,15 +22,19 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get -y install lcov
|
sudo apt-get -y install lcov
|
||||||
sudo apt-get -y install gcovr
|
sudo apt-get -y install gcovr
|
||||||
|
- name: Install Libtorch
|
||||||
|
run: |
|
||||||
|
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcpu.zip
|
||||||
|
unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
|
||||||
- name: Tests & build-wrapper
|
- name: Tests & build-wrapper
|
||||||
run: |
|
run: |
|
||||||
cmake -S . -B build -Wno-dev
|
cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch
|
||||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
|
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
|
||||||
cd build
|
cd build
|
||||||
make
|
make
|
||||||
ctest -C Release --output-on-failure --test-dir tests
|
ctest -C Release --output-on-failure --test-dir tests
|
||||||
cd ..
|
cd ..
|
||||||
gcovr -f CPPFImdlp.cpp -f Metrics.cpp -f BinDisc.cpp --txt --sonarqube=coverage.xml
|
gcovr -f CPPFImdlp.cpp -f Metrics.cpp -f BinDisc.cpp -f Discretizer.cpp --txt --sonarqube=coverage.xml
|
||||||
- name: Run sonar-scanner
|
- name: Run sonar-scanner
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
11
.vscode/launch.json
vendored
11
.vscode/launch.json
vendored
@@ -8,15 +8,10 @@
|
|||||||
"name": "C++ Launch config",
|
"name": "C++ Launch config",
|
||||||
"type": "cppdbg",
|
"type": "cppdbg",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/build/sample/sample",
|
"program": "${workspaceFolder}/tests/build/Metrics_unittest",
|
||||||
"cwd": "${workspaceFolder}/build/sample",
|
"cwd": "${workspaceFolder}/tests/build",
|
||||||
"args": [
|
"args": [],
|
||||||
"-f",
|
|
||||||
"glass"
|
|
||||||
],
|
|
||||||
"targetArchitecture": "arm64",
|
|
||||||
"launchCompleteCommand": "exec-run",
|
"launchCompleteCommand": "exec-run",
|
||||||
"preLaunchTask": "CMake: build",
|
|
||||||
"stopAtEntry": false,
|
"stopAtEntry": false,
|
||||||
"linux": {
|
"linux": {
|
||||||
"MIMode": "gdb",
|
"MIMode": "gdb",
|
||||||
|
102
.vscode/settings.json
vendored
102
.vscode/settings.json
vendored
@@ -5,5 +5,105 @@
|
|||||||
},
|
},
|
||||||
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
||||||
"cmake.configureOnOpen": true,
|
"cmake.configureOnOpen": true,
|
||||||
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json"
|
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json",
|
||||||
|
"files.associations": {
|
||||||
|
"*.rmd": "markdown",
|
||||||
|
"*.py": "python",
|
||||||
|
"vector": "cpp",
|
||||||
|
"__bit_reference": "cpp",
|
||||||
|
"__bits": "cpp",
|
||||||
|
"__config": "cpp",
|
||||||
|
"__debug": "cpp",
|
||||||
|
"__errc": "cpp",
|
||||||
|
"__hash_table": "cpp",
|
||||||
|
"__locale": "cpp",
|
||||||
|
"__mutex_base": "cpp",
|
||||||
|
"__node_handle": "cpp",
|
||||||
|
"__nullptr": "cpp",
|
||||||
|
"__split_buffer": "cpp",
|
||||||
|
"__string": "cpp",
|
||||||
|
"__threading_support": "cpp",
|
||||||
|
"__tuple": "cpp",
|
||||||
|
"array": "cpp",
|
||||||
|
"atomic": "cpp",
|
||||||
|
"bitset": "cpp",
|
||||||
|
"cctype": "cpp",
|
||||||
|
"chrono": "cpp",
|
||||||
|
"clocale": "cpp",
|
||||||
|
"cmath": "cpp",
|
||||||
|
"compare": "cpp",
|
||||||
|
"complex": "cpp",
|
||||||
|
"concepts": "cpp",
|
||||||
|
"cstdarg": "cpp",
|
||||||
|
"cstddef": "cpp",
|
||||||
|
"cstdint": "cpp",
|
||||||
|
"cstdio": "cpp",
|
||||||
|
"cstdlib": "cpp",
|
||||||
|
"cstring": "cpp",
|
||||||
|
"ctime": "cpp",
|
||||||
|
"cwchar": "cpp",
|
||||||
|
"cwctype": "cpp",
|
||||||
|
"exception": "cpp",
|
||||||
|
"initializer_list": "cpp",
|
||||||
|
"ios": "cpp",
|
||||||
|
"iosfwd": "cpp",
|
||||||
|
"istream": "cpp",
|
||||||
|
"limits": "cpp",
|
||||||
|
"locale": "cpp",
|
||||||
|
"memory": "cpp",
|
||||||
|
"mutex": "cpp",
|
||||||
|
"new": "cpp",
|
||||||
|
"optional": "cpp",
|
||||||
|
"ostream": "cpp",
|
||||||
|
"ratio": "cpp",
|
||||||
|
"sstream": "cpp",
|
||||||
|
"stdexcept": "cpp",
|
||||||
|
"streambuf": "cpp",
|
||||||
|
"string": "cpp",
|
||||||
|
"string_view": "cpp",
|
||||||
|
"system_error": "cpp",
|
||||||
|
"tuple": "cpp",
|
||||||
|
"type_traits": "cpp",
|
||||||
|
"typeinfo": "cpp",
|
||||||
|
"unordered_map": "cpp",
|
||||||
|
"variant": "cpp",
|
||||||
|
"algorithm": "cpp",
|
||||||
|
"iostream": "cpp",
|
||||||
|
"iomanip": "cpp",
|
||||||
|
"numeric": "cpp",
|
||||||
|
"set": "cpp",
|
||||||
|
"__tree": "cpp",
|
||||||
|
"deque": "cpp",
|
||||||
|
"list": "cpp",
|
||||||
|
"map": "cpp",
|
||||||
|
"unordered_set": "cpp",
|
||||||
|
"any": "cpp",
|
||||||
|
"condition_variable": "cpp",
|
||||||
|
"forward_list": "cpp",
|
||||||
|
"fstream": "cpp",
|
||||||
|
"stack": "cpp",
|
||||||
|
"thread": "cpp",
|
||||||
|
"__memory": "cpp",
|
||||||
|
"filesystem": "cpp",
|
||||||
|
"*.toml": "toml",
|
||||||
|
"utility": "cpp",
|
||||||
|
"span": "cpp",
|
||||||
|
"*.tcc": "cpp",
|
||||||
|
"bit": "cpp",
|
||||||
|
"charconv": "cpp",
|
||||||
|
"cinttypes": "cpp",
|
||||||
|
"codecvt": "cpp",
|
||||||
|
"functional": "cpp",
|
||||||
|
"iterator": "cpp",
|
||||||
|
"memory_resource": "cpp",
|
||||||
|
"random": "cpp",
|
||||||
|
"source_location": "cpp",
|
||||||
|
"format": "cpp",
|
||||||
|
"numbers": "cpp",
|
||||||
|
"semaphore": "cpp",
|
||||||
|
"stop_token": "cpp",
|
||||||
|
"text_encoding": "cpp",
|
||||||
|
"typeindex": "cpp",
|
||||||
|
"valarray": "cpp"
|
||||||
|
}
|
||||||
}
|
}
|
55
BinDisc.cpp
55
BinDisc.cpp
@@ -7,7 +7,8 @@
|
|||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
|
|
||||||
BinDisc::BinDisc(int n_bins, strategy_t strategy) : n_bins{ n_bins }, strategy{ strategy }
|
BinDisc::BinDisc(int n_bins, strategy_t strategy) :
|
||||||
|
Discretizer(), n_bins{ n_bins }, strategy{ strategy }
|
||||||
{
|
{
|
||||||
if (n_bins < 3) {
|
if (n_bins < 3) {
|
||||||
throw std::invalid_argument("n_bins must be greater than 2");
|
throw std::invalid_argument("n_bins must be greater than 2");
|
||||||
@@ -16,6 +17,7 @@ namespace mdlp {
|
|||||||
BinDisc::~BinDisc() = default;
|
BinDisc::~BinDisc() = default;
|
||||||
void BinDisc::fit(samples_t& X)
|
void BinDisc::fit(samples_t& X)
|
||||||
{
|
{
|
||||||
|
// y is included for compatibility with the Discretizer interface
|
||||||
cutPoints.clear();
|
cutPoints.clear();
|
||||||
if (X.empty()) {
|
if (X.empty()) {
|
||||||
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
||||||
@@ -27,6 +29,10 @@ namespace mdlp {
|
|||||||
fit_uniform(X);
|
fit_uniform(X);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void BinDisc::fit(samples_t& X, labels_t& y)
|
||||||
|
{
|
||||||
|
fit(X);
|
||||||
|
}
|
||||||
std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
|
std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
|
||||||
{
|
{
|
||||||
// Doesn't include end point as it is not needed
|
// Doesn't include end point as it is not needed
|
||||||
@@ -90,49 +96,4 @@ namespace mdlp {
|
|||||||
// Remove first as it is not needed
|
// Remove first as it is not needed
|
||||||
cutPoints.erase(cutPoints.begin());
|
cutPoints.erase(cutPoints.begin());
|
||||||
}
|
}
|
||||||
labels_t& BinDisc::transform(const samples_t& X)
|
}
|
||||||
{
|
|
||||||
discretizedData.clear();
|
|
||||||
discretizedData.reserve(X.size());
|
|
||||||
for (const precision_t& item : X) {
|
|
||||||
auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
|
||||||
discretizedData.push_back(upper - cutPoints.begin());
|
|
||||||
}
|
|
||||||
return discretizedData;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// void BinDisc::fit_quantile(samples_t& X)
|
|
||||||
// {
|
|
||||||
// cutPoints.clear();
|
|
||||||
// if (X.empty()) {
|
|
||||||
// cutPoints.push_back(std::numeric_limits<float>::max());
|
|
||||||
// return;
|
|
||||||
// }
|
|
||||||
// samples_t data = X;
|
|
||||||
// std::sort(data.begin(), data.end());
|
|
||||||
// float min_val = data.front();
|
|
||||||
// float max_val = data.back();
|
|
||||||
// // Handle case of all data points having the same value
|
|
||||||
// if (min_val == max_val) {
|
|
||||||
// cutPoints.push_back(std::numeric_limits<float>::max());
|
|
||||||
// return;
|
|
||||||
// }
|
|
||||||
// int first = X.size() / n_bins;
|
|
||||||
// cutPoints.push_back(data.at(first - 1));
|
|
||||||
// int bins_done = 1;
|
|
||||||
// int prev = first - 1;
|
|
||||||
// while (bins_done < n_bins) {
|
|
||||||
// int next = first * (bins_done + 1) - 1;
|
|
||||||
// while (next < X.size() && data.at(next) == data[prev]) {
|
|
||||||
// ++next;
|
|
||||||
// }
|
|
||||||
// if (next == X.size() || bins_done == n_bins - 1) {
|
|
||||||
// cutPoints.push_back(std::numeric_limits<float>::max());
|
|
||||||
// break;
|
|
||||||
// } else {
|
|
||||||
// cutPoints.push_back(data[next]);
|
|
||||||
// bins_done++;
|
|
||||||
// prev = next;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
13
BinDisc.h
13
BinDisc.h
@@ -2,30 +2,27 @@
|
|||||||
#define BINDISC_H
|
#define BINDISC_H
|
||||||
|
|
||||||
#include "typesFImdlp.h"
|
#include "typesFImdlp.h"
|
||||||
|
#include "Discretizer.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
|
|
||||||
enum class strategy_t {
|
enum class strategy_t {
|
||||||
UNIFORM,
|
UNIFORM,
|
||||||
QUANTILE
|
QUANTILE
|
||||||
};
|
};
|
||||||
class BinDisc {
|
class BinDisc : public Discretizer {
|
||||||
public:
|
public:
|
||||||
BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
|
BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
|
||||||
~BinDisc();
|
~BinDisc();
|
||||||
void fit(samples_t&);
|
// y is included for compatibility with the Discretizer interface
|
||||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
void fit(samples_t& X_, labels_t& y) override;
|
||||||
labels_t& transform(const samples_t&);
|
void fit(samples_t& X);
|
||||||
static inline std::string version() { return "1.0.0"; };
|
|
||||||
private:
|
private:
|
||||||
void fit_uniform(samples_t&);
|
void fit_uniform(samples_t&);
|
||||||
void fit_quantile(samples_t&);
|
void fit_quantile(samples_t&);
|
||||||
void normalizeCutPoints();
|
void normalizeCutPoints();
|
||||||
int n_bins;
|
int n_bins;
|
||||||
strategy_t strategy;
|
strategy_t strategy;
|
||||||
labels_t discretizedData = labels_t();
|
|
||||||
cutPoints_t cutPoints;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1,13 +1,9 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
cmake_minimum_required(VERSION 3.20)
|
||||||
project(mdlp)
|
project(mdlp)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
if (POLICY CMP0135)
|
find_package(Torch REQUIRED)
|
||||||
cmake_policy(SET CMP0135 NEW)
|
include_directories(${TORCH_INCLUDE_DIRS})
|
||||||
endif ()
|
add_library(mdlp CPPFImdlp.cpp Metrics.cpp BinDisc.cpp Discretizer.cpp)
|
||||||
|
target_link_libraries(mdlp "${TORCH_LIBRARIES}")
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
|
||||||
|
|
||||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
|
||||||
add_subdirectory(sample)
|
add_subdirectory(sample)
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
|
|
@@ -6,16 +6,14 @@
|
|||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) :
|
||||||
|
Discretizer(),
|
||||||
|
min_length(min_length_),
|
||||||
max_depth(max_depth_),
|
max_depth(max_depth_),
|
||||||
proposed_cuts(proposed)
|
proposed_cuts(proposed)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp() = default;
|
|
||||||
|
|
||||||
CPPFImdlp::~CPPFImdlp() = default;
|
|
||||||
|
|
||||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||||
{
|
{
|
||||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||||
@@ -208,14 +206,5 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||||
}
|
}
|
||||||
labels_t& CPPFImdlp::transform(const samples_t& data)
|
|
||||||
{
|
|
||||||
discretizedData.clear();
|
|
||||||
discretizedData.reserve(data.size());
|
|
||||||
for (const precision_t& item : data) {
|
|
||||||
auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
|
||||||
discretizedData.push_back(upper - cutPoints.begin());
|
|
||||||
}
|
|
||||||
return discretizedData;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
16
CPPFImdlp.h
16
CPPFImdlp.h
@@ -6,18 +6,16 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
|
#include "Discretizer.h"
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class CPPFImdlp {
|
class CPPFImdlp : public Discretizer {
|
||||||
public:
|
public:
|
||||||
CPPFImdlp();
|
CPPFImdlp() = default;
|
||||||
CPPFImdlp(size_t, int, float);
|
CPPFImdlp(size_t min_length_, int max_depth_, float proposed);
|
||||||
~CPPFImdlp();
|
virtual ~CPPFImdlp() = default;
|
||||||
void fit(samples_t&, labels_t&);
|
void fit(samples_t& X_, labels_t& y_) override;
|
||||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
|
||||||
labels_t& transform(const samples_t&);
|
|
||||||
inline int get_depth() const { return depth; };
|
inline int get_depth() const { return depth; };
|
||||||
static inline std::string version() { return "1.1.3"; };
|
|
||||||
protected:
|
protected:
|
||||||
size_t min_length = 3;
|
size_t min_length = 3;
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
@@ -27,9 +25,7 @@ namespace mdlp {
|
|||||||
samples_t X = samples_t();
|
samples_t X = samples_t();
|
||||||
labels_t y = labels_t();
|
labels_t y = labels_t();
|
||||||
Metrics metrics = Metrics(y, indices);
|
Metrics metrics = Metrics(y, indices);
|
||||||
cutPoints_t cutPoints;
|
|
||||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||||
labels_t discretizedData = labels_t();
|
|
||||||
static indices_t sortIndices(samples_t&, labels_t&);
|
static indices_t sortIndices(samples_t&, labels_t&);
|
||||||
void computeCutPoints(size_t, size_t, int);
|
void computeCutPoints(size_t, size_t, int);
|
||||||
void resizeCutPoints();
|
void resizeCutPoints();
|
||||||
|
41
Discretizer.cpp
Normal file
41
Discretizer.cpp
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#include "Discretizer.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
labels_t& Discretizer::transform(const samples_t& data)
|
||||||
|
{
|
||||||
|
discretizedData.clear();
|
||||||
|
discretizedData.reserve(data.size());
|
||||||
|
for (const precision_t& item : data) {
|
||||||
|
auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
||||||
|
discretizedData.push_back(upper - cutPoints.begin());
|
||||||
|
}
|
||||||
|
return discretizedData;
|
||||||
|
}
|
||||||
|
labels_t& Discretizer::fit_transform(samples_t& X_, labels_t& y_)
|
||||||
|
{
|
||||||
|
fit(X_, y_);
|
||||||
|
return transform(X_);
|
||||||
|
}
|
||||||
|
void Discretizer::fit_t(torch::Tensor& X_, torch::Tensor& y_)
|
||||||
|
{
|
||||||
|
auto num_elements = X_.numel();
|
||||||
|
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||||
|
labels_t y(y_.data_ptr<int64_t>(), y_.data_ptr<int64_t>() + num_elements);
|
||||||
|
fit(X, y);
|
||||||
|
}
|
||||||
|
torch::Tensor Discretizer::transform_t(torch::Tensor& X_)
|
||||||
|
{
|
||||||
|
auto num_elements = X_.numel();
|
||||||
|
samples_t X(X_.data_ptr<float>(), X_.data_ptr<float>() + num_elements);
|
||||||
|
auto result = transform(X);
|
||||||
|
return torch::tensor(result, torch::kInt64);
|
||||||
|
}
|
||||||
|
torch::Tensor Discretizer::fit_transform_t(torch::Tensor& X_, torch::Tensor& y_)
|
||||||
|
{
|
||||||
|
auto num_elements = X_.numel();
|
||||||
|
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||||
|
labels_t y(y_.data_ptr<int64_t>(), y_.data_ptr<int64_t>() + num_elements);
|
||||||
|
auto result = fit_transform(X, y);
|
||||||
|
return torch::tensor(result, torch::kInt64);
|
||||||
|
}
|
||||||
|
}
|
27
Discretizer.h
Normal file
27
Discretizer.h
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#ifndef DISCRETIZER_H
|
||||||
|
#define DISCRETIZER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include "typesFImdlp.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
class Discretizer {
|
||||||
|
public:
|
||||||
|
Discretizer() = default;
|
||||||
|
virtual ~Discretizer() = default;
|
||||||
|
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||||
|
virtual void fit(samples_t& X_, labels_t& y_) = 0;
|
||||||
|
labels_t& transform(const samples_t& data);
|
||||||
|
labels_t& fit_transform(samples_t& X_, labels_t& y_);
|
||||||
|
void fit_t(torch::Tensor& X_, torch::Tensor& y_);
|
||||||
|
torch::Tensor transform_t(torch::Tensor& X_);
|
||||||
|
torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
|
||||||
|
static inline std::string version() { return "1.2.1"; };
|
||||||
|
protected:
|
||||||
|
labels_t discretizedData = labels_t();
|
||||||
|
cutPoints_t cutPoints;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -4,8 +4,8 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
|
Metrics::Metrics(labels_t& y_, indices_t& indices_) : y(y_), indices(indices_),
|
||||||
numClasses(computeNumClasses(0, indices.size()))
|
numClasses(computeNumClasses(0, indices_.size()))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
README.md
10
README.md
@@ -23,12 +23,10 @@ The algorithm returns the cut points for the variable.
|
|||||||
To run the sample, just execute the following commands:
|
To run the sample, just execute the following commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd sample
|
cmake -B build -S .
|
||||||
cmake -B build
|
cmake --build build
|
||||||
cd build
|
build/sample/sample -f iris -m 2
|
||||||
make
|
build/sample/sample -h
|
||||||
./sample -f iris -m 2
|
|
||||||
./sample -h
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Test
|
## Test
|
||||||
|
21
sample/.vscode/launch.json
vendored
21
sample/.vscode/launch.json
vendored
@@ -1,21 +0,0 @@
|
|||||||
{
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "lldb puro",
|
|
||||||
"type": "cppdbg",
|
|
||||||
// "targetArchitecture": "arm64",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${workspaceRoot}/build/sample",
|
|
||||||
"args": [
|
|
||||||
"-f",
|
|
||||||
"iris"
|
|
||||||
],
|
|
||||||
"stopAtEntry": false,
|
|
||||||
"cwd": "${workspaceRoot}/build/",
|
|
||||||
"environment": [],
|
|
||||||
"externalConsole": false,
|
|
||||||
"MIMode": "lldb"
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
@@ -1,5 +1,6 @@
|
|||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
set(CMAKE_BUILD_TYPE Debug)
|
set(CMAKE_BUILD_TYPE Debug)
|
||||||
|
|
||||||
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
add_executable(sample sample.cpp ../tests/ArffFiles.cpp)
|
||||||
|
target_link_libraries(sample mdlp "${TORCH_LIBRARIES}")
|
||||||
|
@@ -5,13 +5,13 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include "../Discretizer.h"
|
||||||
#include "../CPPFImdlp.h"
|
#include "../CPPFImdlp.h"
|
||||||
|
#include "../BinDisc.h"
|
||||||
#include "../tests/ArffFiles.h"
|
#include "../tests/ArffFiles.h"
|
||||||
|
|
||||||
using namespace std;
|
const string PATH = "tests/datasets/";
|
||||||
using namespace mdlp;
|
|
||||||
|
|
||||||
const string PATH = "../../tests/datasets/";
|
|
||||||
|
|
||||||
/* print a description of all supported options */
|
/* print a description of all supported options */
|
||||||
void usage(const char* path)
|
void usage(const char* path)
|
||||||
@@ -20,17 +20,17 @@ void usage(const char* path)
|
|||||||
const char* basename = strrchr(path, '/');
|
const char* basename = strrchr(path, '/');
|
||||||
basename = basename ? basename + 1 : path;
|
basename = basename ? basename + 1 : path;
|
||||||
|
|
||||||
cout << "usage: " << basename << "[OPTION]" << endl;
|
std::cout << "usage: " << basename << "[OPTION]" << std::endl;
|
||||||
cout << " -h, --help\t\t Print this help and exit." << endl;
|
std::cout << " -h, --help\t\t Print this help and exit." << std::endl;
|
||||||
cout
|
std::cout
|
||||||
<< " -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
<< " -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
||||||
<< endl;
|
<< std::endl;
|
||||||
cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
|
std::cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << std::endl;
|
||||||
cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
|
std::cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << std::endl;
|
||||||
cout
|
std::cout
|
||||||
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
|
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
|
||||||
<< endl;
|
<< std::endl;
|
||||||
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
|
std::cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
||||||
@@ -96,56 +96,79 @@ void process_file(const string& path, const string& file_name, bool class_last,
|
|||||||
file.load(path + file_name + ".arff", class_last);
|
file.load(path + file_name + ".arff", class_last);
|
||||||
const auto attributes = file.getAttributes();
|
const auto attributes = file.getAttributes();
|
||||||
const auto items = file.getSize();
|
const auto items = file.getSize();
|
||||||
cout << "Number of lines: " << items << endl;
|
std::cout << "Number of lines: " << items << std::endl;
|
||||||
cout << "Attributes: " << endl;
|
std::cout << "Attributes: " << std::endl;
|
||||||
for (auto attribute : attributes) {
|
for (auto attribute : attributes) {
|
||||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
std::cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << std::endl;
|
||||||
}
|
}
|
||||||
cout << "Class name: " << file.getClassName() << endl;
|
std::cout << "Class name: " << file.getClassName() << std::endl;
|
||||||
cout << "Class type: " << file.getClassType() << endl;
|
std::cout << "Class type: " << file.getClassType() << std::endl;
|
||||||
cout << "Data: " << endl;
|
std::cout << "Data: " << std::endl;
|
||||||
vector<samples_t>& X = file.getX();
|
std::vector<mdlp::samples_t>& X = file.getX();
|
||||||
labels_t& y = file.getY();
|
mdlp::labels_t& y = file.getY();
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 5; i++) {
|
||||||
for (auto feature : X) {
|
for (auto feature : X) {
|
||||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
std::cout << fixed << setprecision(1) << feature[i] << " ";
|
||||||
}
|
}
|
||||||
cout << y[i] << endl;
|
std::cout << y[i] << std::endl;
|
||||||
}
|
}
|
||||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||||
size_t total = 0;
|
size_t total = 0;
|
||||||
for (auto i = 0; i < attributes.size(); i++) {
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||||
cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
|
std::cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
|
||||||
test.fit(X[i], y);
|
test.fit(X[i], y);
|
||||||
auto cut_points = test.getCutPoints();
|
auto cut_points = test.getCutPoints();
|
||||||
for (auto item : cut_points) {
|
for (auto item : cut_points) {
|
||||||
cout << item;
|
std::cout << item;
|
||||||
if (item != cut_points.back())
|
if (item != cut_points.back())
|
||||||
cout << ", ";
|
std::cout << ", ";
|
||||||
}
|
}
|
||||||
total += test.getCutPoints().size();
|
total += test.getCutPoints().size();
|
||||||
cout << "]" << endl;
|
std::cout << "]" << std::endl;
|
||||||
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
|
std::cout << "Min: " << *min_max.first << " Max: " << *min_max.second << std::endl;
|
||||||
cout << "--------------------------" << endl;
|
std::cout << "--------------------------" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "Total cut points ...: " << total << std::endl;
|
||||||
|
std::cout << "Total feature states: " << total + attributes.size() << std::endl;
|
||||||
|
std::cout << "Version ............: " << test.version() << std::endl;
|
||||||
|
std::cout << "Transformed data (vector)..: " << std::endl;
|
||||||
|
test.fit(X[0], y);
|
||||||
|
auto data = test.transform(X[0]);
|
||||||
|
for (int i = 130; i < 135; i++) {
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
|
||||||
|
}
|
||||||
|
auto Xt = torch::tensor(X[0], torch::kFloat32);
|
||||||
|
auto yt = torch::tensor(y, torch::kInt64);
|
||||||
|
//test.fit_t(Xt, yt);
|
||||||
|
auto result = test.fit_transform_t(Xt, yt);
|
||||||
|
std::cout << "Transformed data (torch)...: " << std::endl;
|
||||||
|
for (int i = 130; i < 135; i++) {
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << result[i].item<int64_t>() << std::endl;
|
||||||
|
}
|
||||||
|
auto disc = mdlp::BinDisc(3);
|
||||||
|
auto res_v = disc.fit_transform(X[0], y);
|
||||||
|
disc.fit_t(Xt, yt);
|
||||||
|
auto res_t = disc.transform_t(Xt);
|
||||||
|
std::cout << "Transformed data (BinDisc)...: " << std::endl;
|
||||||
|
for (int i = 130; i < 135; i++) {
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << res_v[i] << " " << res_t[i].item<int64_t>() << std::endl;
|
||||||
}
|
}
|
||||||
cout << "Total cut points ...: " << total << endl;
|
|
||||||
cout << "Total feature states: " << total + attributes.size() << endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
|
void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
|
||||||
float max_cutpoints)
|
float max_cutpoints)
|
||||||
{
|
{
|
||||||
cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
std::cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
||||||
<< max_cutpoints << endl << endl;
|
<< max_cutpoints << std::endl << std::endl;
|
||||||
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
||||||
printf("==================== ==== ==== ========\n");
|
printf("==================== ==== ==== ========\n");
|
||||||
for (const auto& dataset : datasets) {
|
for (const auto& dataset : datasets) {
|
||||||
ArffFiles file;
|
ArffFiles file;
|
||||||
file.load(path + dataset.first + ".arff", dataset.second);
|
file.load(path + dataset.first + ".arff", dataset.second);
|
||||||
auto attributes = file.getAttributes();
|
auto attributes = file.getAttributes();
|
||||||
vector<samples_t>& X = file.getX();
|
std::vector<mdlp::samples_t>& X = file.getX();
|
||||||
labels_t& y = file.getY();
|
mdlp::labels_t& y = file.getY();
|
||||||
size_t timing = 0;
|
size_t timing = 0;
|
||||||
size_t cut_points = 0;
|
size_t cut_points = 0;
|
||||||
for (auto i = 0; i < attributes.size(); i++) {
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
@@ -163,7 +186,7 @@ void process_all_files(const map<string, bool>& datasets, const string& path, in
|
|||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
map<string, bool> datasets = {
|
std::map<std::string, bool> datasets = {
|
||||||
{"diabetes", true},
|
{"diabetes", true},
|
||||||
{"glass", true},
|
{"glass", true},
|
||||||
{"iris", true},
|
{"iris", true},
|
||||||
@@ -173,14 +196,14 @@ int main(int argc, char** argv)
|
|||||||
{"mfeat-factors", true},
|
{"mfeat-factors", true},
|
||||||
{"test", true}
|
{"test", true}
|
||||||
};
|
};
|
||||||
string file_name;
|
std::string file_name;
|
||||||
string path;
|
std::string path;
|
||||||
int max_depth;
|
int max_depth;
|
||||||
int min_length;
|
int min_length;
|
||||||
float max_cutpoints;
|
float max_cutpoints;
|
||||||
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
||||||
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
||||||
cout << "Invalid file name: " << file_name << endl;
|
std::cout << "Invalid file name: " << file_name << std::endl;
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@@ -188,10 +211,10 @@ int main(int argc, char** argv)
|
|||||||
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
||||||
else {
|
else {
|
||||||
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
||||||
cout << "File name ....: " << file_name << endl;
|
std::cout << "File name ....: " << file_name << std::endl;
|
||||||
cout << "Max depth ....: " << max_depth << endl;
|
std::cout << "Max depth ....: " << max_depth << std::endl;
|
||||||
cout << "Min length ...: " << min_length << endl;
|
std::cout << "Min length ...: " << min_length << std::endl;
|
||||||
cout << "Max cutpoints : " << max_cutpoints << endl;
|
std::cout << "Max cutpoints : " << max_cutpoints << std::endl;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@@ -37,12 +37,13 @@ namespace mdlp {
|
|||||||
TEST_F(TestBinDisc3U, Easy3BinsUniform)
|
TEST_F(TestBinDisc3U, Easy3BinsUniform)
|
||||||
{
|
{
|
||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||||
fit(X);
|
auto y = labels_t();
|
||||||
|
fit(X, y);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_NEAR(3.66667, cuts[0], margin);
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_NEAR(6.33333, cuts[1], margin);
|
EXPECT_NEAR(3.66667, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(6.33333, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_EQ(numeric_limits<float>::max(), cuts.at(2));
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -52,10 +53,10 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_NEAR(3.666667, cuts[0], margin);
|
EXPECT_NEAR(3.666667, cuts[0], margin);
|
||||||
EXPECT_NEAR(6.333333, cuts[1], margin);
|
EXPECT_NEAR(6.333333, cuts[1], margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
EXPECT_EQ(3, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -65,10 +66,10 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_EQ(4.0, cuts[0]);
|
EXPECT_EQ(4.0, cuts[0]);
|
||||||
EXPECT_EQ(7.0, cuts[1]);
|
EXPECT_EQ(7.0, cuts[1]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
EXPECT_EQ(3, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -78,10 +79,10 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_EQ(4, cuts[0]);
|
EXPECT_EQ(4, cuts[0]);
|
||||||
EXPECT_EQ(7, cuts[1]);
|
EXPECT_EQ(7, cuts[1]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
EXPECT_EQ(3, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -91,10 +92,10 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_NEAR(4.33333, cuts[0], margin);
|
EXPECT_NEAR(4.33333, cuts[0], margin);
|
||||||
EXPECT_NEAR(7.66667, cuts[1], margin);
|
EXPECT_NEAR(7.66667, cuts[1], margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
EXPECT_EQ(3, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -104,10 +105,10 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_NEAR(4.33333, cuts[0], margin);
|
EXPECT_NEAR(4.33333, cuts[0], margin);
|
||||||
EXPECT_NEAR(7.66667, cuts[1], margin);
|
EXPECT_NEAR(7.66667, cuts[1], margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
EXPECT_EQ(3, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -117,8 +118,8 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(1, cuts.size());
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
EXPECT_EQ(1, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -128,8 +129,8 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
|
||||||
EXPECT_EQ(1, cuts.size());
|
EXPECT_EQ(1, cuts.size());
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -139,16 +140,16 @@ namespace mdlp {
|
|||||||
samples_t X = {};
|
samples_t X = {};
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
|
||||||
EXPECT_EQ(1, cuts.size());
|
EXPECT_EQ(1, cuts.size());
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc3Q, EmptyQuantile)
|
TEST_F(TestBinDisc3Q, EmptyQuantile)
|
||||||
{
|
{
|
||||||
samples_t X = {};
|
samples_t X = {};
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
|
||||||
EXPECT_EQ(1, cuts.size());
|
EXPECT_EQ(1, cuts.size());
|
||||||
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
||||||
}
|
}
|
||||||
TEST(TestBinDisc3, ExceptionNumberBins)
|
TEST(TestBinDisc3, ExceptionNumberBins)
|
||||||
{
|
{
|
||||||
@@ -159,44 +160,38 @@ namespace mdlp {
|
|||||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_NEAR(1.66667, cuts[0], margin);
|
EXPECT_NEAR(1.66667, cuts[0], margin);
|
||||||
EXPECT_NEAR(2.33333, cuts[1], margin);
|
EXPECT_NEAR(2.33333, cuts[1], margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
EXPECT_EQ(3, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
|
labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
EXPECT_EQ(3.0, X[0]); // X is not modified
|
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc3Q, EasyRepeated)
|
TEST_F(TestBinDisc3Q, EasyRepeated)
|
||||||
{
|
{
|
||||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
std::cout << "cuts: ";
|
EXPECT_EQ(2, cuts.size());
|
||||||
for (auto cut : cuts) {
|
|
||||||
std::cout << cut << " ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout << std::string(80, '-') << std::endl;
|
|
||||||
EXPECT_NEAR(1.66667, cuts[0], margin);
|
EXPECT_NEAR(1.66667, cuts[0], margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[1]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[1]);
|
||||||
EXPECT_EQ(2, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
|
labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
EXPECT_EQ(3.0, X[0]); // X is not modified
|
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc4U, Easy4BinsUniform)
|
TEST_F(TestBinDisc4U, Easy4BinsUniform)
|
||||||
{
|
{
|
||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(3.75, cuts[0]);
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
ASSERT_EQ(3.75, cuts[0]);
|
||||||
EXPECT_EQ(6.5, cuts[1]);
|
EXPECT_EQ(6.5, cuts[1]);
|
||||||
EXPECT_EQ(9.25, cuts[2]);
|
EXPECT_EQ(9.25, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -206,11 +201,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(3.75, cuts[0]);
|
EXPECT_EQ(4, cuts.size());
|
||||||
|
ASSERT_EQ(3.75, cuts[0]);
|
||||||
EXPECT_EQ(6.5, cuts[1]);
|
EXPECT_EQ(6.5, cuts[1]);
|
||||||
EXPECT_EQ(9.25, cuts[2]);
|
EXPECT_EQ(9.25, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -220,11 +215,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(4.0, cuts[0]);
|
EXPECT_EQ(4.0, cuts[0]);
|
||||||
EXPECT_EQ(7.0, cuts[1]);
|
EXPECT_EQ(7.0, cuts[1]);
|
||||||
EXPECT_EQ(10.0, cuts[2]);
|
EXPECT_EQ(10.0, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -234,11 +229,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(4.0, cuts[0]);
|
EXPECT_EQ(4.0, cuts[0]);
|
||||||
EXPECT_EQ(7.0, cuts[1]);
|
EXPECT_EQ(7.0, cuts[1]);
|
||||||
EXPECT_EQ(10.0, cuts[2]);
|
EXPECT_EQ(10.0, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -248,11 +243,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(4.25, cuts[0]);
|
EXPECT_EQ(4.25, cuts[0]);
|
||||||
EXPECT_EQ(7.5, cuts[1]);
|
EXPECT_EQ(7.5, cuts[1]);
|
||||||
EXPECT_EQ(10.75, cuts[2]);
|
EXPECT_EQ(10.75, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -262,11 +257,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(4.25, cuts[0]);
|
EXPECT_EQ(4.25, cuts[0]);
|
||||||
EXPECT_EQ(7.5, cuts[1]);
|
EXPECT_EQ(7.5, cuts[1]);
|
||||||
EXPECT_EQ(10.75, cuts[2]);
|
EXPECT_EQ(10.75, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -276,11 +271,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(4.5, cuts[0]);
|
EXPECT_EQ(4.5, cuts[0]);
|
||||||
EXPECT_EQ(8, cuts[1]);
|
EXPECT_EQ(8, cuts[1]);
|
||||||
EXPECT_EQ(11.5, cuts[2]);
|
EXPECT_EQ(11.5, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
|
labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -290,11 +285,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(4.5, cuts[0]);
|
EXPECT_EQ(4.5, cuts[0]);
|
||||||
EXPECT_EQ(8, cuts[1]);
|
EXPECT_EQ(8, cuts[1]);
|
||||||
EXPECT_EQ(11.5, cuts[2]);
|
EXPECT_EQ(11.5, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
|
labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -305,11 +300,11 @@ namespace mdlp {
|
|||||||
// 0 1 2 3 4 5 6 7 8 9
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
EXPECT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(1.0, cuts[0]);
|
EXPECT_EQ(1.0, cuts[0]);
|
||||||
EXPECT_EQ(2.0, cuts[1]);
|
EXPECT_EQ(2.0, cuts[1]);
|
||||||
EXPECT_EQ(3.0, cuts[2]);
|
ASSERT_EQ(3.0, cuts[2]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
||||||
EXPECT_EQ(4, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -320,10 +315,10 @@ namespace mdlp {
|
|||||||
// 0 1 2 3 4 5 6 7 8 9
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
|
ASSERT_EQ(3, cuts.size());
|
||||||
EXPECT_EQ(2.0, cuts[0]);
|
EXPECT_EQ(2.0, cuts[0]);
|
||||||
EXPECT_EQ(3.0, cuts[1]);
|
ASSERT_EQ(3.0, cuts[1]);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
||||||
EXPECT_EQ(3, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -337,6 +332,13 @@ namespace mdlp {
|
|||||||
auto Xt = transform(X[0]);
|
auto Xt = transform(X[0]);
|
||||||
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
||||||
EXPECT_EQ(expected, Xt);
|
EXPECT_EQ(expected, Xt);
|
||||||
|
auto Xtt = fit_transform(X[0], file.getY());
|
||||||
|
EXPECT_EQ(expected, Xtt);
|
||||||
|
auto Xt_t = torch::tensor(X[0], torch::kFloat32);
|
||||||
|
auto y_t = torch::tensor(file.getY(), torch::kInt64);
|
||||||
|
auto Xtt_t = fit_transform_t(Xt_t, y_t);
|
||||||
|
for (int i = 0; i < expected.size(); i++)
|
||||||
|
EXPECT_EQ(expected[i], Xtt_t[i].item<int64_t>());
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc4Q, irisQuantile)
|
TEST_F(TestBinDisc4Q, irisQuantile)
|
||||||
{
|
{
|
||||||
@@ -347,5 +349,16 @@ namespace mdlp {
|
|||||||
auto Xt = transform(X[0]);
|
auto Xt = transform(X[0]);
|
||||||
labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
||||||
EXPECT_EQ(expected, Xt);
|
EXPECT_EQ(expected, Xt);
|
||||||
|
auto Xtt = fit_transform(X[0], file.getY());
|
||||||
|
EXPECT_EQ(expected, Xtt);
|
||||||
|
auto Xt_t = torch::tensor(X[0], torch::kFloat32);
|
||||||
|
auto y_t = torch::tensor(file.getY(), torch::kInt64);
|
||||||
|
auto Xtt_t = fit_transform_t(Xt_t, y_t);
|
||||||
|
for (int i = 0; i < expected.size(); i++)
|
||||||
|
EXPECT_EQ(expected[i], Xtt_t[i].item<int64_t>());
|
||||||
|
fit_t(Xt_t, y_t);
|
||||||
|
auto Xt_t2 = transform_t(Xt_t);
|
||||||
|
for (int i = 0; i < expected.size(); i++)
|
||||||
|
EXPECT_EQ(expected[i], Xt_t2[i].item<int64_t>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,10 +1,8 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
cmake_minimum_required(VERSION 3.20)
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
cmake_policy(SET CMP0135 NEW)
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
include_directories(${GTEST_INCLUDE_DIRS})
|
include_directories(${GTEST_INCLUDE_DIRS})
|
||||||
|
|
||||||
|
|
||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
googletest
|
googletest
|
||||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||||
@@ -13,22 +11,35 @@ FetchContent_Declare(
|
|||||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||||
FetchContent_MakeAvailable(googletest)
|
FetchContent_MakeAvailable(googletest)
|
||||||
|
|
||||||
|
find_package(Torch REQUIRED)
|
||||||
|
|
||||||
enable_testing()
|
enable_testing()
|
||||||
|
|
||||||
|
include_directories(${TORCH_INCLUDE_DIRS})
|
||||||
|
|
||||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
||||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
|
||||||
add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp)
|
|
||||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
|
||||||
target_link_libraries(BinDisc_unittest GTest::gtest_main)
|
|
||||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
|
||||||
target_compile_options(BinDisc_unittest PRIVATE --coverage)
|
|
||||||
target_link_options(Metrics_unittest PRIVATE --coverage)
|
target_link_options(Metrics_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp ../Discretizer.cpp)
|
||||||
|
target_link_libraries(FImdlp_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||||
|
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||||
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp ../Discretizer.cpp)
|
||||||
|
target_link_libraries(BinDisc_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||||
|
target_compile_options(BinDisc_unittest PRIVATE --coverage)
|
||||||
target_link_options(BinDisc_unittest PRIVATE --coverage)
|
target_link_options(BinDisc_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(Discretizer_unittest ../BinDisc.cpp ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp ../Discretizer.cpp Discretizer_unittest.cpp)
|
||||||
|
target_link_libraries(Discretizer_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||||
|
target_compile_options(Discretizer_unittest PRIVATE --coverage)
|
||||||
|
target_link_options(Discretizer_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
include(GoogleTest)
|
include(GoogleTest)
|
||||||
|
|
||||||
gtest_discover_tests(Metrics_unittest)
|
gtest_discover_tests(Metrics_unittest)
|
||||||
gtest_discover_tests(FImdlp_unittest)
|
gtest_discover_tests(FImdlp_unittest)
|
||||||
gtest_discover_tests(BinDisc_unittest)
|
gtest_discover_tests(BinDisc_unittest)
|
||||||
|
gtest_discover_tests(Discretizer_unittest)
|
74
tests/Discretizer_unittest.cpp
Normal file
74
tests/Discretizer_unittest.cpp
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "ArffFiles.h"
|
||||||
|
#include "../Discretizer.h"
|
||||||
|
#include "../BinDisc.h"
|
||||||
|
#include "../CPPFImdlp.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
const float margin = 1e-4;
|
||||||
|
static std::string set_data_path()
|
||||||
|
{
|
||||||
|
std::string path = "../datasets/";
|
||||||
|
std::ifstream file(path + "iris.arff");
|
||||||
|
if (file.is_open()) {
|
||||||
|
file.close();
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
return "../../tests/datasets/";
|
||||||
|
}
|
||||||
|
const std::string data_path = set_data_path();
|
||||||
|
|
||||||
|
TEST(Discretizer, BinIrisUniform)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
auto y = labels_t();
|
||||||
|
disc->fit(X[0], y);
|
||||||
|
auto Xt = disc->transform(X[0]);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(expected, Xt);
|
||||||
|
}
|
||||||
|
TEST(Discretizer, BinIrisQuantile)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
auto y = labels_t();
|
||||||
|
disc->fit(X[0], y);
|
||||||
|
auto Xt = disc->transform(X[0]);
|
||||||
|
labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(expected, Xt);
|
||||||
|
}
|
||||||
|
TEST(Discretizer, FImdlpIris)
|
||||||
|
{
|
||||||
|
labels_t expected = {
|
||||||
|
5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||||
|
5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
|
||||||
|
5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
|
||||||
|
3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
|
||||||
|
3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
|
||||||
|
4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
|
||||||
|
1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
|
||||||
|
};
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new CPPFImdlp();
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
labels_t& y = file.getY();
|
||||||
|
disc->fit(X[1], y);
|
||||||
|
auto computed = disc->transform(X[1]);
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
|
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||||
|
EXPECT_EQ(computed[i], expected[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -345,10 +345,15 @@ namespace mdlp {
|
|||||||
vector<samples_t>& X = file.getX();
|
vector<samples_t>& X = file.getX();
|
||||||
labels_t& y = file.getY();
|
labels_t& y = file.getY();
|
||||||
fit(X[1], y);
|
fit(X[1], y);
|
||||||
auto computed = transform(X[1]);
|
// auto computed = transform(X[1]);
|
||||||
EXPECT_EQ(computed.size(), expected.size());
|
// EXPECT_EQ(computed.size(), expected.size());
|
||||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
// for (unsigned long i = 0; i < computed.size(); i++) {
|
||||||
EXPECT_EQ(computed[i], expected[i]);
|
// EXPECT_EQ(computed[i], expected[i]);
|
||||||
}
|
// }
|
||||||
|
// auto computed_ft = fit_transform(X[1], y);
|
||||||
|
// EXPECT_EQ(computed_ft.size(), expected.size());
|
||||||
|
// for (unsigned long i = 0; i < computed_ft.size(); i++) {
|
||||||
|
// EXPECT_EQ(computed_ft[i], expected[i]);
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2,13 +2,13 @@
|
|||||||
#include "../Metrics.h"
|
#include "../Metrics.h"
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class TestMetrics: public Metrics, public testing::Test {
|
class TestMetrics : public Metrics, public testing::Test {
|
||||||
public:
|
public:
|
||||||
labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||||
precision_t precision = 0.000001f;
|
precision_t precision = 1e-6;
|
||||||
|
|
||||||
TestMetrics(): Metrics(y_, indices_) {};
|
TestMetrics() : Metrics(y_, indices_) {};
|
||||||
|
|
||||||
void SetUp() override
|
void SetUp() override
|
||||||
{
|
{
|
||||||
|
@@ -1,18 +1,15 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
if [ -d build ] ; then
|
if [ -d build ] && [ "$1" != "run" ]; then
|
||||||
rm -fr build
|
rm -fr build
|
||||||
fi
|
fi
|
||||||
if [ -d gcovr-report ] ; then
|
if [ -d gcovr-report ] ; then
|
||||||
rm -fr gcovr-report
|
rm -fr gcovr-report
|
||||||
fi
|
fi
|
||||||
cmake -S . -B build -Wno-dev
|
cmake -S . -B build -Wno-dev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="--coverage" -DCMAKE_C_FLAGS="--coverage"
|
||||||
cmake --build build
|
cmake --build build
|
||||||
cd build
|
cd build
|
||||||
ctest --output-on-failure
|
ctest --output-on-failure
|
||||||
cd ..
|
cd ..
|
||||||
mkdir gcovr-report
|
mkdir gcovr-report
|
||||||
#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
|
||||||
#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
|
|
||||||
#lcov --list lcoverage/main_coverage.info
|
|
||||||
cd ..
|
cd ..
|
||||||
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines
|
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --gcov-filter "Discretizer.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines
|
||||||
|
@@ -89,6 +89,7 @@ print(f"Quaintile {clf4q.bin_edges_=}")
|
|||||||
print("-" * 80)
|
print("-" * 80)
|
||||||
#
|
#
|
||||||
data, meta = loadarff("tests/datasets/iris.arff")
|
data, meta = loadarff("tests/datasets/iris.arff")
|
||||||
|
|
||||||
labelsu = [
|
labelsu = [
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
@@ -117,12 +118,12 @@ labelsu = [
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
1,
|
0,
|
||||||
1,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
1,
|
1,
|
||||||
1,
|
0,
|
||||||
1,
|
1,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
@@ -149,11 +150,11 @@ labelsu = [
|
|||||||
2,
|
2,
|
||||||
0,
|
0,
|
||||||
2,
|
2,
|
||||||
1,
|
0,
|
||||||
0,
|
0,
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
2,
|
1,
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
1,
|
1,
|
||||||
@@ -161,9 +162,9 @@ labelsu = [
|
|||||||
2,
|
2,
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
|
1,
|
||||||
2,
|
2,
|
||||||
2,
|
1,
|
||||||
2,
|
|
||||||
2,
|
2,
|
||||||
2,
|
2,
|
||||||
2,
|
2,
|
||||||
@@ -181,7 +182,7 @@ labelsu = [
|
|||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
2,
|
1,
|
||||||
1,
|
1,
|
||||||
0,
|
0,
|
||||||
1,
|
1,
|
||||||
@@ -217,14 +218,14 @@ labelsu = [
|
|||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
2,
|
2,
|
||||||
2,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
3,
|
3,
|
||||||
3,
|
3,
|
||||||
2,
|
2,
|
||||||
2,
|
2,
|
||||||
2,
|
1,
|
||||||
3,
|
3,
|
||||||
2,
|
2,
|
||||||
2,
|
2,
|
||||||
@@ -393,12 +394,19 @@ labelsq = [
|
|||||||
2,
|
2,
|
||||||
2,
|
2,
|
||||||
]
|
]
|
||||||
test(clf4u, data["sepallength"], labelsu, title="IrisUniform")
|
# test(clf4u, data["sepallength"], labelsu, title="IrisUniform")
|
||||||
test(clf4q, data["sepallength"], labelsq, title="IrisQuantile")
|
# test(clf4q, data["sepallength"], labelsq, title="IrisQuantile")
|
||||||
# print("Labels")
|
sepallength = [[x] for x in data["sepallength"]]
|
||||||
# print(labels)
|
clf4u.fit(sepallength)
|
||||||
# print("Expected")
|
clf4q.fit(sepallength)
|
||||||
# print(expected)
|
computedu = clf4u.transform(sepallength)
|
||||||
# for i in range(len(labels)):
|
computedq = clf4q.transform(sepallength)
|
||||||
# if labels[i] != expected[i]:
|
wrongu = 0
|
||||||
# print(f"Error at {i} {labels[i]} != {expected[i]}")
|
wrongq = 0
|
||||||
|
for i in range(len(labelsu)):
|
||||||
|
if labelsu[i] != computedu[i]:
|
||||||
|
wrongu += 1
|
||||||
|
if labelsq[i] != computedq[i]:
|
||||||
|
wrongq += 1
|
||||||
|
print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Uniform ={wrongu:3d}")
|
||||||
|
print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Quantile ={wrongq:3d}")
|
||||||
|
Reference in New Issue
Block a user