mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-17 00:15:59 +00:00
Compare commits
26 Commits
Author | SHA1 | Date | |
---|---|---|---|
ab12622009
|
|||
248a511972
|
|||
d9bd0126f9
|
|||
210af46a88
|
|||
2db60e007d
|
|||
1cf245fa49
|
|||
|
e36d9af8f9 | ||
7b0673fd4b
|
|||
a1346e1943
|
|||
b3fc598c29
|
|||
cc1efa0b4e
|
|||
90965877eb
|
|||
c4e6c041fe
|
|||
7938df7f0f
|
|||
7ee9896734
|
|||
8f7f605670
|
|||
2f55b27691
|
|||
378fbd51ef
|
|||
402d0da878
|
|||
f34bcc2ed7
|
|||
c9ba35fb58
|
|||
e205668906
|
|||
633aa52849
|
|||
61de687476
|
|||
7ff88c8e4b
|
|||
|
638bb2a59e |
22
.github/workflows/build.yml
vendored
22
.github/workflows/build.yml
vendored
@@ -13,28 +13,32 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4.1.6
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||||
|
submodules: recursive
|
||||||
- name: Install sonar-scanner and build-wrapper
|
- name: Install sonar-scanner and build-wrapper
|
||||||
uses: SonarSource/sonarcloud-github-c-cpp@v2
|
uses: SonarSource/sonarcloud-github-c-cpp@v2
|
||||||
- name: Install lcov & gcovr
|
- name: Install lcov & gcovr
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -y install lcov
|
sudo apt-get -y install lcov
|
||||||
sudo apt-get -y install gcovr
|
sudo apt-get -y install gcovr
|
||||||
|
- name: Install Libtorch
|
||||||
|
run: |
|
||||||
|
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcpu.zip
|
||||||
|
unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
|
||||||
- name: Tests & build-wrapper
|
- name: Tests & build-wrapper
|
||||||
run: |
|
run: |
|
||||||
cmake -S . -B build -Wno-dev
|
cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON
|
||||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
|
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Debug
|
||||||
|
cmake --build build -j 4
|
||||||
cd build
|
cd build
|
||||||
make
|
ctest -C Debug --output-on-failure -j 4
|
||||||
ctest -C Release --output-on-failure --test-dir tests
|
gcovr -f ../src/CPPFImdlp.cpp -f ../src/Metrics.cpp -f ../src/BinDisc.cpp -f ../src/Discretizer.cpp --txt --sonarqube=coverage.xml
|
||||||
cd ..
|
|
||||||
gcovr -f CPPFImdlp.cpp -f Metrics.cpp -f BinDisc.cpp --txt --sonarqube=coverage.xml
|
|
||||||
- name: Run sonar-scanner
|
- name: Run sonar-scanner
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
sonar-scanner --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
sonar-scanner --define sonar.cfamily.compile-commands="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
||||||
--define sonar.coverageReportPaths=coverage.xml
|
--define sonar.coverageReportPaths=build/coverage.xml
|
2
.gitignore
vendored
2
.gitignore
vendored
@@ -33,6 +33,8 @@
|
|||||||
**/build
|
**/build
|
||||||
build_Debug
|
build_Debug
|
||||||
build_Release
|
build_Release
|
||||||
|
build_debug
|
||||||
|
build_release
|
||||||
**/lcoverage
|
**/lcoverage
|
||||||
.idea
|
.idea
|
||||||
cmake-*
|
cmake-*
|
||||||
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[submodule "tests/lib/Files"]
|
||||||
|
path = tests/lib/Files
|
||||||
|
url = https://github.com/rmontanana/ArffFiles.git
|
11
.vscode/launch.json
vendored
11
.vscode/launch.json
vendored
@@ -8,15 +8,10 @@
|
|||||||
"name": "C++ Launch config",
|
"name": "C++ Launch config",
|
||||||
"type": "cppdbg",
|
"type": "cppdbg",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/build/sample/sample",
|
"program": "${workspaceFolder}/tests/build/BinDisc_unittest",
|
||||||
"cwd": "${workspaceFolder}/build/sample",
|
"cwd": "${workspaceFolder}/tests/build",
|
||||||
"args": [
|
"args": [],
|
||||||
"-f",
|
|
||||||
"glass"
|
|
||||||
],
|
|
||||||
"targetArchitecture": "arm64",
|
|
||||||
"launchCompleteCommand": "exec-run",
|
"launchCompleteCommand": "exec-run",
|
||||||
"preLaunchTask": "CMake: build",
|
|
||||||
"stopAtEntry": false,
|
"stopAtEntry": false,
|
||||||
"linux": {
|
"linux": {
|
||||||
"MIMode": "gdb",
|
"MIMode": "gdb",
|
||||||
|
102
.vscode/settings.json
vendored
102
.vscode/settings.json
vendored
@@ -5,5 +5,105 @@
|
|||||||
},
|
},
|
||||||
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
||||||
"cmake.configureOnOpen": true,
|
"cmake.configureOnOpen": true,
|
||||||
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json"
|
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json",
|
||||||
|
"files.associations": {
|
||||||
|
"*.rmd": "markdown",
|
||||||
|
"*.py": "python",
|
||||||
|
"vector": "cpp",
|
||||||
|
"__bit_reference": "cpp",
|
||||||
|
"__bits": "cpp",
|
||||||
|
"__config": "cpp",
|
||||||
|
"__debug": "cpp",
|
||||||
|
"__errc": "cpp",
|
||||||
|
"__hash_table": "cpp",
|
||||||
|
"__locale": "cpp",
|
||||||
|
"__mutex_base": "cpp",
|
||||||
|
"__node_handle": "cpp",
|
||||||
|
"__nullptr": "cpp",
|
||||||
|
"__split_buffer": "cpp",
|
||||||
|
"__string": "cpp",
|
||||||
|
"__threading_support": "cpp",
|
||||||
|
"__tuple": "cpp",
|
||||||
|
"array": "cpp",
|
||||||
|
"atomic": "cpp",
|
||||||
|
"bitset": "cpp",
|
||||||
|
"cctype": "cpp",
|
||||||
|
"chrono": "cpp",
|
||||||
|
"clocale": "cpp",
|
||||||
|
"cmath": "cpp",
|
||||||
|
"compare": "cpp",
|
||||||
|
"complex": "cpp",
|
||||||
|
"concepts": "cpp",
|
||||||
|
"cstdarg": "cpp",
|
||||||
|
"cstddef": "cpp",
|
||||||
|
"cstdint": "cpp",
|
||||||
|
"cstdio": "cpp",
|
||||||
|
"cstdlib": "cpp",
|
||||||
|
"cstring": "cpp",
|
||||||
|
"ctime": "cpp",
|
||||||
|
"cwchar": "cpp",
|
||||||
|
"cwctype": "cpp",
|
||||||
|
"exception": "cpp",
|
||||||
|
"initializer_list": "cpp",
|
||||||
|
"ios": "cpp",
|
||||||
|
"iosfwd": "cpp",
|
||||||
|
"istream": "cpp",
|
||||||
|
"limits": "cpp",
|
||||||
|
"locale": "cpp",
|
||||||
|
"memory": "cpp",
|
||||||
|
"mutex": "cpp",
|
||||||
|
"new": "cpp",
|
||||||
|
"optional": "cpp",
|
||||||
|
"ostream": "cpp",
|
||||||
|
"ratio": "cpp",
|
||||||
|
"sstream": "cpp",
|
||||||
|
"stdexcept": "cpp",
|
||||||
|
"streambuf": "cpp",
|
||||||
|
"string": "cpp",
|
||||||
|
"string_view": "cpp",
|
||||||
|
"system_error": "cpp",
|
||||||
|
"tuple": "cpp",
|
||||||
|
"type_traits": "cpp",
|
||||||
|
"typeinfo": "cpp",
|
||||||
|
"unordered_map": "cpp",
|
||||||
|
"variant": "cpp",
|
||||||
|
"algorithm": "cpp",
|
||||||
|
"iostream": "cpp",
|
||||||
|
"iomanip": "cpp",
|
||||||
|
"numeric": "cpp",
|
||||||
|
"set": "cpp",
|
||||||
|
"__tree": "cpp",
|
||||||
|
"deque": "cpp",
|
||||||
|
"list": "cpp",
|
||||||
|
"map": "cpp",
|
||||||
|
"unordered_set": "cpp",
|
||||||
|
"any": "cpp",
|
||||||
|
"condition_variable": "cpp",
|
||||||
|
"forward_list": "cpp",
|
||||||
|
"fstream": "cpp",
|
||||||
|
"stack": "cpp",
|
||||||
|
"thread": "cpp",
|
||||||
|
"__memory": "cpp",
|
||||||
|
"filesystem": "cpp",
|
||||||
|
"*.toml": "toml",
|
||||||
|
"utility": "cpp",
|
||||||
|
"span": "cpp",
|
||||||
|
"*.tcc": "cpp",
|
||||||
|
"bit": "cpp",
|
||||||
|
"charconv": "cpp",
|
||||||
|
"cinttypes": "cpp",
|
||||||
|
"codecvt": "cpp",
|
||||||
|
"functional": "cpp",
|
||||||
|
"iterator": "cpp",
|
||||||
|
"memory_resource": "cpp",
|
||||||
|
"random": "cpp",
|
||||||
|
"source_location": "cpp",
|
||||||
|
"format": "cpp",
|
||||||
|
"numbers": "cpp",
|
||||||
|
"semaphore": "cpp",
|
||||||
|
"stop_token": "cpp",
|
||||||
|
"text_encoding": "cpp",
|
||||||
|
"typeindex": "cpp",
|
||||||
|
"valarray": "cpp"
|
||||||
|
}
|
||||||
}
|
}
|
138
BinDisc.cpp
138
BinDisc.cpp
@@ -1,138 +0,0 @@
|
|||||||
#include <algorithm>
|
|
||||||
#include <limits>
|
|
||||||
#include <cmath>
|
|
||||||
#include "BinDisc.h"
|
|
||||||
#include <iostream>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
namespace mdlp {
|
|
||||||
|
|
||||||
BinDisc::BinDisc(int n_bins, strategy_t strategy) : n_bins{ n_bins }, strategy{ strategy }
|
|
||||||
{
|
|
||||||
if (n_bins < 3) {
|
|
||||||
throw std::invalid_argument("n_bins must be greater than 2");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BinDisc::~BinDisc() = default;
|
|
||||||
void BinDisc::fit(samples_t& X)
|
|
||||||
{
|
|
||||||
cutPoints.clear();
|
|
||||||
if (X.empty()) {
|
|
||||||
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (strategy == strategy_t::QUANTILE) {
|
|
||||||
fit_quantile(X);
|
|
||||||
} else if (strategy == strategy_t::UNIFORM) {
|
|
||||||
fit_uniform(X);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
|
|
||||||
{
|
|
||||||
// Doesn't include end point as it is not needed
|
|
||||||
if (start == end) {
|
|
||||||
return { 0 };
|
|
||||||
}
|
|
||||||
precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
|
|
||||||
std::vector<precision_t> linspc;
|
|
||||||
for (size_t i = 0; i < num - 1; ++i) {
|
|
||||||
precision_t val = start + delta * static_cast<precision_t>(i);
|
|
||||||
linspc.push_back(val);
|
|
||||||
}
|
|
||||||
return linspc;
|
|
||||||
}
|
|
||||||
size_t clip(const size_t n, size_t lower, size_t upper)
|
|
||||||
{
|
|
||||||
return std::max(lower, std::min(n, upper));
|
|
||||||
}
|
|
||||||
std::vector<precision_t> percentile(samples_t& data, std::vector<precision_t>& percentiles)
|
|
||||||
{
|
|
||||||
// Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
|
|
||||||
std::vector<precision_t> results;
|
|
||||||
results.reserve(percentiles.size());
|
|
||||||
for (auto percentile : percentiles) {
|
|
||||||
const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
|
|
||||||
const auto indexLower = clip(i, 0, data.size() - 1);
|
|
||||||
const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
|
|
||||||
const double fraction =
|
|
||||||
(percentile / 100.0 - percentI) /
|
|
||||||
(static_cast<double>(indexLower + 1) / static_cast<double>(data.size() - 1) - percentI);
|
|
||||||
const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction;
|
|
||||||
if (value != results.back())
|
|
||||||
results.push_back(value);
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
void BinDisc::fit_quantile(samples_t& X)
|
|
||||||
{
|
|
||||||
auto quantiles = linspace(0.0, 100.0, n_bins + 1);
|
|
||||||
auto data = X;
|
|
||||||
std::sort(data.begin(), data.end());
|
|
||||||
if (data.front() == data.back() || data.size() == 1) {
|
|
||||||
// if X is constant
|
|
||||||
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
cutPoints = percentile(data, quantiles);
|
|
||||||
normalizeCutPoints();
|
|
||||||
}
|
|
||||||
void BinDisc::fit_uniform(samples_t& X)
|
|
||||||
{
|
|
||||||
|
|
||||||
auto minmax = std::minmax_element(X.begin(), X.end());
|
|
||||||
cutPoints = linspace(*minmax.first, *minmax.second, n_bins + 1);
|
|
||||||
normalizeCutPoints();
|
|
||||||
}
|
|
||||||
void BinDisc::normalizeCutPoints()
|
|
||||||
{
|
|
||||||
// Add max value to the end
|
|
||||||
cutPoints.push_back(std::numeric_limits<precision_t>::max());
|
|
||||||
// Remove first as it is not needed
|
|
||||||
cutPoints.erase(cutPoints.begin());
|
|
||||||
}
|
|
||||||
labels_t& BinDisc::transform(const samples_t& X)
|
|
||||||
{
|
|
||||||
discretizedData.clear();
|
|
||||||
discretizedData.reserve(X.size());
|
|
||||||
for (const precision_t& item : X) {
|
|
||||||
auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
|
||||||
discretizedData.push_back(upper - cutPoints.begin());
|
|
||||||
}
|
|
||||||
return discretizedData;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// void BinDisc::fit_quantile(samples_t& X)
|
|
||||||
// {
|
|
||||||
// cutPoints.clear();
|
|
||||||
// if (X.empty()) {
|
|
||||||
// cutPoints.push_back(std::numeric_limits<float>::max());
|
|
||||||
// return;
|
|
||||||
// }
|
|
||||||
// samples_t data = X;
|
|
||||||
// std::sort(data.begin(), data.end());
|
|
||||||
// float min_val = data.front();
|
|
||||||
// float max_val = data.back();
|
|
||||||
// // Handle case of all data points having the same value
|
|
||||||
// if (min_val == max_val) {
|
|
||||||
// cutPoints.push_back(std::numeric_limits<float>::max());
|
|
||||||
// return;
|
|
||||||
// }
|
|
||||||
// int first = X.size() / n_bins;
|
|
||||||
// cutPoints.push_back(data.at(first - 1));
|
|
||||||
// int bins_done = 1;
|
|
||||||
// int prev = first - 1;
|
|
||||||
// while (bins_done < n_bins) {
|
|
||||||
// int next = first * (bins_done + 1) - 1;
|
|
||||||
// while (next < X.size() && data.at(next) == data[prev]) {
|
|
||||||
// ++next;
|
|
||||||
// }
|
|
||||||
// if (next == X.size() || bins_done == n_bins - 1) {
|
|
||||||
// cutPoints.push_back(std::numeric_limits<float>::max());
|
|
||||||
// break;
|
|
||||||
// } else {
|
|
||||||
// cutPoints.push_back(data[next]);
|
|
||||||
// bins_done++;
|
|
||||||
// prev = next;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
31
BinDisc.h
31
BinDisc.h
@@ -1,31 +0,0 @@
|
|||||||
#ifndef BINDISC_H
|
|
||||||
#define BINDISC_H
|
|
||||||
|
|
||||||
#include "typesFImdlp.h"
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
namespace mdlp {
|
|
||||||
|
|
||||||
enum class strategy_t {
|
|
||||||
UNIFORM,
|
|
||||||
QUANTILE
|
|
||||||
};
|
|
||||||
class BinDisc {
|
|
||||||
public:
|
|
||||||
BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
|
|
||||||
~BinDisc();
|
|
||||||
void fit(samples_t&);
|
|
||||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
|
||||||
labels_t& transform(const samples_t&);
|
|
||||||
static inline std::string version() { return "1.0.0"; };
|
|
||||||
private:
|
|
||||||
void fit_uniform(samples_t&);
|
|
||||||
void fit_quantile(samples_t&);
|
|
||||||
void normalizeCutPoints();
|
|
||||||
int n_bins;
|
|
||||||
strategy_t strategy;
|
|
||||||
labels_t discretizedData = labels_t();
|
|
||||||
cutPoints_t cutPoints;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
#endif
|
|
@@ -1,13 +1,44 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
cmake_minimum_required(VERSION 3.20)
|
||||||
project(mdlp)
|
|
||||||
|
|
||||||
if (POLICY CMP0135)
|
project(fimdlp)
|
||||||
cmake_policy(SET CMP0135 NEW)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
endif ()
|
cmake_policy(SET CMP0135 NEW)
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
find_package(Torch CONFIG REQUIRED)
|
||||||
|
|
||||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-elide-constructors")
|
||||||
add_subdirectory(sample)
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||||
add_subdirectory(tests)
|
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||||
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (ENABLE_TESTING)
|
||||||
|
MESSAGE("Debug mode")
|
||||||
|
enable_testing()
|
||||||
|
set(CODE_COVERAGE ON)
|
||||||
|
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||||
|
add_subdirectory(tests)
|
||||||
|
else(ENABLE_TESTING)
|
||||||
|
MESSAGE("Release mode")
|
||||||
|
endif(ENABLE_TESTING)
|
||||||
|
|
||||||
|
if (ENABLE_SAMPLE)
|
||||||
|
message("Building sample")
|
||||||
|
add_subdirectory(sample)
|
||||||
|
endif(ENABLE_SAMPLE)
|
||||||
|
|
||||||
|
include_directories(
|
||||||
|
${TORCH_INCLUDE_DIRS}
|
||||||
|
${fimdlp_SOURCE_DIR}/src
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(fimdlp src/CPPFImdlp.cpp src/Metrics.cpp src/BinDisc.cpp src/Discretizer.cpp)
|
||||||
|
target_link_libraries(fimdlp "${TORCH_LIBRARIES}")
|
||||||
|
|
||||||
|
# Installation
|
||||||
|
# ------------
|
||||||
|
install(TARGETS fimdlp
|
||||||
|
ARCHIVE DESTINATION lib
|
||||||
|
LIBRARY DESTINATION lib
|
||||||
|
CONFIGURATIONS Release)
|
||||||
|
install(DIRECTORY src/ DESTINATION include/fimdlp FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h")
|
||||||
|
35
Makefile
Normal file
35
Makefile
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
SHELL := /bin/bash
|
||||||
|
.DEFAULT_GOAL := build
|
||||||
|
.PHONY: build test
|
||||||
|
lcov := lcov
|
||||||
|
|
||||||
|
build:
|
||||||
|
@if [ -d build_release ]; then rm -fr build_release; fi
|
||||||
|
@mkdir build_release
|
||||||
|
@cmake -B build_release -S . -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF -DENABLE_SAMPLE=ON
|
||||||
|
@cmake --build build_release -j 8
|
||||||
|
|
||||||
|
install:
|
||||||
|
@cmake --build build_release --target install -j 8
|
||||||
|
|
||||||
|
test:
|
||||||
|
@if [ -d build_debug ]; then rm -fr build_debug; fi
|
||||||
|
@mkdir build_debug
|
||||||
|
@cmake -B build_debug -S . -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON -DENABLE_SAMPLE=ON
|
||||||
|
@cmake --build build_debug -j 8
|
||||||
|
@cd build_debug/tests && ctest --output-on-failure -j 8
|
||||||
|
@cd build_debug/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --output-file coverage.info >/dev/null 2>&1; \
|
||||||
|
$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
|
$(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
|
$(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
|
$(lcov) --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
|
$(lcov) --remove coverage.info 'gtest/*' --output-file coverage.info >/dev/null 2>&1;
|
||||||
|
@genhtml build_debug/tests/coverage.info --demangle-cpp --output-directory build_debug/tests/coverage --title "Discretizer mdlp Coverage Report" -s -k -f --legend
|
||||||
|
@echo "* Coverage report is generated at build_debug/tests/coverage/index.html"
|
||||||
|
@which python || (echo ">>> Please install python"; exit 1)
|
||||||
|
@if [ ! -f build_debug/tests/coverage.info ]; then \
|
||||||
|
echo ">>> No coverage.info file found!"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
@echo ">>> Updating coverage badge..."
|
||||||
|
@env python update_coverage.py build_debug/tests
|
27
README.md
27
README.md
@@ -1,6 +1,7 @@
|
|||||||
[](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
[](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||||
|
[](html/index.html)
|
||||||
|
|
||||||
# <img src="logo.png" alt="logo" width="50"/> mdlp
|
# <img src="logo.png" alt="logo" width="50"/> mdlp
|
||||||
|
|
||||||
@@ -14,28 +15,32 @@ The implementation tries to mitigate the problem of different label values with
|
|||||||
Other features:
|
Other features:
|
||||||
|
|
||||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||||
- Intervals have to have more than two examples to be evaluated.
|
- Intervals have to have more than two examples to be evaluated (mdlp).
|
||||||
|
|
||||||
The algorithm returns the cut points for the variable.
|
- The algorithm returns the cut points for the variable.
|
||||||
|
|
||||||
|
- The transform method uses the cut points returning its index in the following way:
|
||||||
|
|
||||||
|
cut[i - 1] <= x < cut[i]
|
||||||
|
|
||||||
|
using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
|
||||||
|
|
||||||
|
- K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.
|
||||||
|
|
||||||
## Sample
|
## Sample
|
||||||
|
|
||||||
To run the sample, just execute the following commands:
|
To run the sample, just execute the following commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd sample
|
make build
|
||||||
cmake -B build
|
build_release/sample/sample -f iris -m 2
|
||||||
cd build
|
build_release/sample/sample -h
|
||||||
make
|
|
||||||
./sample -f iris -m 2
|
|
||||||
./sample -h
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Test
|
## Test
|
||||||
|
|
||||||
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
To run the tests and see coverage (llvm with lcov and genhtml have to be installed), execute the following commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd tests
|
make test
|
||||||
./test
|
|
||||||
```
|
```
|
||||||
|
21
sample/.vscode/launch.json
vendored
21
sample/.vscode/launch.json
vendored
@@ -1,21 +0,0 @@
|
|||||||
{
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "lldb puro",
|
|
||||||
"type": "cppdbg",
|
|
||||||
// "targetArchitecture": "arm64",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${workspaceRoot}/build/sample",
|
|
||||||
"args": [
|
|
||||||
"-f",
|
|
||||||
"iris"
|
|
||||||
],
|
|
||||||
"stopAtEntry": false,
|
|
||||||
"cwd": "${workspaceRoot}/build/",
|
|
||||||
"environment": [],
|
|
||||||
"externalConsole": false,
|
|
||||||
"MIMode": "lldb"
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
@@ -1,5 +1,11 @@
|
|||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
set(CMAKE_BUILD_TYPE Debug)
|
set(CMAKE_BUILD_TYPE Debug)
|
||||||
|
|
||||||
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
include_directories(
|
||||||
|
${fimdlp_SOURCE_DIR}/src
|
||||||
|
${fimdlp_SOURCE_DIR}/tests/lib/Files
|
||||||
|
)
|
||||||
|
|
||||||
|
add_executable(sample sample.cpp )
|
||||||
|
target_link_libraries(sample fimdlp "${TORCH_LIBRARIES}")
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
@@ -5,13 +11,13 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "../CPPFImdlp.h"
|
#include <torch/torch.h>
|
||||||
#include "../tests/ArffFiles.h"
|
#include <ArffFiles.hpp>
|
||||||
|
#include "Discretizer.h"
|
||||||
|
#include "CPPFImdlp.h"
|
||||||
|
#include "BinDisc.h"
|
||||||
|
|
||||||
using namespace std;
|
const string PATH = "tests/datasets/";
|
||||||
using namespace mdlp;
|
|
||||||
|
|
||||||
const string PATH = "../../tests/datasets/";
|
|
||||||
|
|
||||||
/* print a description of all supported options */
|
/* print a description of all supported options */
|
||||||
void usage(const char* path)
|
void usage(const char* path)
|
||||||
@@ -20,17 +26,17 @@ void usage(const char* path)
|
|||||||
const char* basename = strrchr(path, '/');
|
const char* basename = strrchr(path, '/');
|
||||||
basename = basename ? basename + 1 : path;
|
basename = basename ? basename + 1 : path;
|
||||||
|
|
||||||
cout << "usage: " << basename << "[OPTION]" << endl;
|
std::cout << "usage: " << basename << "[OPTION]" << std::endl;
|
||||||
cout << " -h, --help\t\t Print this help and exit." << endl;
|
std::cout << " -h, --help\t\t Print this help and exit." << std::endl;
|
||||||
cout
|
std::cout
|
||||||
<< " -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
<< " -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
||||||
<< endl;
|
<< std::endl;
|
||||||
cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
|
std::cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << std::endl;
|
||||||
cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
|
std::cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << std::endl;
|
||||||
cout
|
std::cout
|
||||||
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
|
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
|
||||||
<< endl;
|
<< std::endl;
|
||||||
cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
|
std::cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
||||||
@@ -96,56 +102,79 @@ void process_file(const string& path, const string& file_name, bool class_last,
|
|||||||
file.load(path + file_name + ".arff", class_last);
|
file.load(path + file_name + ".arff", class_last);
|
||||||
const auto attributes = file.getAttributes();
|
const auto attributes = file.getAttributes();
|
||||||
const auto items = file.getSize();
|
const auto items = file.getSize();
|
||||||
cout << "Number of lines: " << items << endl;
|
std::cout << "Number of lines: " << items << std::endl;
|
||||||
cout << "Attributes: " << endl;
|
std::cout << "Attributes: " << std::endl;
|
||||||
for (auto attribute : attributes) {
|
for (auto attribute : attributes) {
|
||||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
std::cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << std::endl;
|
||||||
}
|
}
|
||||||
cout << "Class name: " << file.getClassName() << endl;
|
std::cout << "Class name: " << file.getClassName() << std::endl;
|
||||||
cout << "Class type: " << file.getClassType() << endl;
|
std::cout << "Class type: " << file.getClassType() << std::endl;
|
||||||
cout << "Data: " << endl;
|
std::cout << "Data: " << std::endl;
|
||||||
vector<samples_t>& X = file.getX();
|
std::vector<mdlp::samples_t>& X = file.getX();
|
||||||
labels_t& y = file.getY();
|
mdlp::labels_t& y = file.getY();
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 5; i++) {
|
||||||
for (auto feature : X) {
|
for (auto feature : X) {
|
||||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
std::cout << fixed << setprecision(1) << feature[i] << " ";
|
||||||
}
|
}
|
||||||
cout << y[i] << endl;
|
std::cout << y[i] << std::endl;
|
||||||
}
|
}
|
||||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||||
size_t total = 0;
|
size_t total = 0;
|
||||||
for (auto i = 0; i < attributes.size(); i++) {
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||||
cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
|
std::cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
|
||||||
test.fit(X[i], y);
|
test.fit(X[i], y);
|
||||||
auto cut_points = test.getCutPoints();
|
auto cut_points = test.getCutPoints();
|
||||||
for (auto item : cut_points) {
|
for (auto item : cut_points) {
|
||||||
cout << item;
|
std::cout << item;
|
||||||
if (item != cut_points.back())
|
if (item != cut_points.back())
|
||||||
cout << ", ";
|
std::cout << ", ";
|
||||||
}
|
}
|
||||||
total += test.getCutPoints().size();
|
total += test.getCutPoints().size();
|
||||||
cout << "]" << endl;
|
std::cout << "]" << std::endl;
|
||||||
cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
|
std::cout << "Min: " << *min_max.first << " Max: " << *min_max.second << std::endl;
|
||||||
cout << "--------------------------" << endl;
|
std::cout << "--------------------------" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "Total cut points ...: " << total << std::endl;
|
||||||
|
std::cout << "Total feature states: " << total + attributes.size() << std::endl;
|
||||||
|
std::cout << "Version ............: " << test.version() << std::endl;
|
||||||
|
std::cout << "Transformed data (vector)..: " << std::endl;
|
||||||
|
test.fit(X[0], y);
|
||||||
|
auto data = test.transform(X[0]);
|
||||||
|
for (int i = 130; i < 135; i++) {
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
|
||||||
|
}
|
||||||
|
auto Xt = torch::tensor(X[0], torch::kFloat32);
|
||||||
|
auto yt = torch::tensor(y, torch::kInt32);
|
||||||
|
//test.fit_t(Xt, yt);
|
||||||
|
auto result = test.fit_transform_t(Xt, yt);
|
||||||
|
std::cout << "Transformed data (torch)...: " << std::endl;
|
||||||
|
for (int i = 130; i < 135; i++) {
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<mdlp::precision_t>() << " " << result[i].item<int>() << std::endl;
|
||||||
|
}
|
||||||
|
auto disc = mdlp::BinDisc(3);
|
||||||
|
auto res_v = disc.fit_transform(X[0], y);
|
||||||
|
disc.fit_t(Xt, yt);
|
||||||
|
auto res_t = disc.transform_t(Xt);
|
||||||
|
std::cout << "Transformed data (BinDisc)...: " << std::endl;
|
||||||
|
for (int i = 130; i < 135; i++) {
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<mdlp::precision_t>() << " " << res_v[i] << " " << res_t[i].item<int>() << std::endl;
|
||||||
}
|
}
|
||||||
cout << "Total cut points ...: " << total << endl;
|
|
||||||
cout << "Total feature states: " << total + attributes.size() << endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
|
void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
|
||||||
float max_cutpoints)
|
float max_cutpoints)
|
||||||
{
|
{
|
||||||
cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
std::cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
||||||
<< max_cutpoints << endl << endl;
|
<< max_cutpoints << std::endl << std::endl;
|
||||||
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
||||||
printf("==================== ==== ==== ========\n");
|
printf("==================== ==== ==== ========\n");
|
||||||
for (const auto& dataset : datasets) {
|
for (const auto& dataset : datasets) {
|
||||||
ArffFiles file;
|
ArffFiles file;
|
||||||
file.load(path + dataset.first + ".arff", dataset.second);
|
file.load(path + dataset.first + ".arff", dataset.second);
|
||||||
auto attributes = file.getAttributes();
|
auto attributes = file.getAttributes();
|
||||||
vector<samples_t>& X = file.getX();
|
std::vector<mdlp::samples_t>& X = file.getX();
|
||||||
labels_t& y = file.getY();
|
mdlp::labels_t& y = file.getY();
|
||||||
size_t timing = 0;
|
size_t timing = 0;
|
||||||
size_t cut_points = 0;
|
size_t cut_points = 0;
|
||||||
for (auto i = 0; i < attributes.size(); i++) {
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
@@ -163,7 +192,7 @@ void process_all_files(const map<string, bool>& datasets, const string& path, in
|
|||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
map<string, bool> datasets = {
|
std::map<std::string, bool> datasets = {
|
||||||
{"diabetes", true},
|
{"diabetes", true},
|
||||||
{"glass", true},
|
{"glass", true},
|
||||||
{"iris", true},
|
{"iris", true},
|
||||||
@@ -173,14 +202,14 @@ int main(int argc, char** argv)
|
|||||||
{"mfeat-factors", true},
|
{"mfeat-factors", true},
|
||||||
{"test", true}
|
{"test", true}
|
||||||
};
|
};
|
||||||
string file_name;
|
std::string file_name;
|
||||||
string path;
|
std::string path;
|
||||||
int max_depth;
|
int max_depth;
|
||||||
int min_length;
|
int min_length;
|
||||||
float max_cutpoints;
|
float max_cutpoints;
|
||||||
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
||||||
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
||||||
cout << "Invalid file name: " << file_name << endl;
|
std::cout << "Invalid file name: " << file_name << std::endl;
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@@ -188,10 +217,10 @@ int main(int argc, char** argv)
|
|||||||
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
||||||
else {
|
else {
|
||||||
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
||||||
cout << "File name ....: " << file_name << endl;
|
std::cout << "File name ....: " << file_name << std::endl;
|
||||||
cout << "Max depth ....: " << max_depth << endl;
|
std::cout << "Max depth ....: " << max_depth << std::endl;
|
||||||
cout << "Min length ...: " << min_length << endl;
|
std::cout << "Min length ...: " << min_length << std::endl;
|
||||||
cout << "Max cutpoints : " << max_cutpoints << endl;
|
std::cout << "Max cutpoints : " << max_cutpoints << std::endl;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@@ -3,7 +3,7 @@ sonar.organization=rmontanana
|
|||||||
|
|
||||||
# This is the name and version displayed in the SonarCloud UI.
|
# This is the name and version displayed in the SonarCloud UI.
|
||||||
sonar.projectName=mdlp
|
sonar.projectName=mdlp
|
||||||
sonar.projectVersion=1.1.3
|
sonar.projectVersion=2.0.1
|
||||||
# sonar.test.exclusions=tests/**
|
# sonar.test.exclusions=tests/**
|
||||||
# sonar.tests=tests/
|
# sonar.tests=tests/
|
||||||
# sonar.coverage.exclusions=tests/**,sample/**
|
# sonar.coverage.exclusions=tests/**,sample/**
|
||||||
|
98
src/BinDisc.cpp
Normal file
98
src/BinDisc.cpp
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include "BinDisc.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
|
||||||
|
BinDisc::BinDisc(int n_bins, strategy_t strategy) :
|
||||||
|
Discretizer(), n_bins{ n_bins }, strategy{ strategy }
|
||||||
|
{
|
||||||
|
if (n_bins < 3) {
|
||||||
|
throw std::invalid_argument("n_bins must be greater than 2");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BinDisc::~BinDisc() = default;
|
||||||
|
void BinDisc::fit(samples_t& X)
|
||||||
|
{
|
||||||
|
// y is included for compatibility with the Discretizer interface
|
||||||
|
cutPoints.clear();
|
||||||
|
if (X.empty()) {
|
||||||
|
cutPoints.push_back(0.0);
|
||||||
|
cutPoints.push_back(0.0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (strategy == strategy_t::QUANTILE) {
|
||||||
|
direction = bound_dir_t::RIGHT;
|
||||||
|
fit_quantile(X);
|
||||||
|
} else if (strategy == strategy_t::UNIFORM) {
|
||||||
|
direction = bound_dir_t::RIGHT;
|
||||||
|
fit_uniform(X);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void BinDisc::fit(samples_t& X, labels_t& y)
|
||||||
|
{
|
||||||
|
fit(X);
|
||||||
|
}
|
||||||
|
std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
|
||||||
|
{
|
||||||
|
if (start == end) {
|
||||||
|
return { start, end };
|
||||||
|
}
|
||||||
|
precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
|
||||||
|
std::vector<precision_t> linspc;
|
||||||
|
for (size_t i = 0; i < num; ++i) {
|
||||||
|
precision_t val = start + delta * static_cast<precision_t>(i);
|
||||||
|
linspc.push_back(val);
|
||||||
|
}
|
||||||
|
return linspc;
|
||||||
|
}
|
||||||
|
size_t clip(const size_t n, const size_t lower, const size_t upper)
|
||||||
|
{
|
||||||
|
return std::max(lower, std::min(n, upper));
|
||||||
|
}
|
||||||
|
std::vector<precision_t> percentile(samples_t& data, const std::vector<precision_t>& percentiles)
|
||||||
|
{
|
||||||
|
// Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
|
||||||
|
std::vector<precision_t> results;
|
||||||
|
bool first = true;
|
||||||
|
results.reserve(percentiles.size());
|
||||||
|
for (auto percentile : percentiles) {
|
||||||
|
const auto i = static_cast<size_t>(std::floor(static_cast<precision_t>(data.size() - 1) * percentile / 100.));
|
||||||
|
const auto indexLower = clip(i, 0, data.size() - 2);
|
||||||
|
const precision_t percentI = static_cast<precision_t>(indexLower) / static_cast<precision_t>(data.size() - 1);
|
||||||
|
const precision_t fraction =
|
||||||
|
(percentile / 100.0 - percentI) /
|
||||||
|
(static_cast<precision_t>(indexLower + 1) / static_cast<precision_t>(data.size() - 1) - percentI);
|
||||||
|
if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; value != results.back() || first) // first needed as results.back() return is undefined for empty vectors
|
||||||
|
results.push_back(value);
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
void BinDisc::fit_quantile(const samples_t& X)
|
||||||
|
{
|
||||||
|
auto quantiles = linspace(0.0, 100.0, n_bins + 1);
|
||||||
|
auto data = X;
|
||||||
|
std::sort(data.begin(), data.end());
|
||||||
|
if (data.front() == data.back() || data.size() == 1) {
|
||||||
|
// if X is constant, pass any two given points that shall be ignored in transform
|
||||||
|
cutPoints.push_back(data.front());
|
||||||
|
cutPoints.push_back(data.front());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
cutPoints = percentile(data, quantiles);
|
||||||
|
}
|
||||||
|
void BinDisc::fit_uniform(const samples_t& X)
|
||||||
|
{
|
||||||
|
auto [vmin, vmax] = std::minmax_element(X.begin(), X.end());
|
||||||
|
cutPoints = linspace(*vmin, *vmax, n_bins + 1);
|
||||||
|
}
|
||||||
|
}
|
33
src/BinDisc.h
Normal file
33
src/BinDisc.h
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
|
#ifndef BINDISC_H
|
||||||
|
#define BINDISC_H
|
||||||
|
|
||||||
|
#include "typesFImdlp.h"
|
||||||
|
#include "Discretizer.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
enum class strategy_t {
|
||||||
|
UNIFORM,
|
||||||
|
QUANTILE
|
||||||
|
};
|
||||||
|
class BinDisc : public Discretizer {
|
||||||
|
public:
|
||||||
|
BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
|
||||||
|
~BinDisc();
|
||||||
|
// y is included for compatibility with the Discretizer interface
|
||||||
|
void fit(samples_t& X_, labels_t& y) override;
|
||||||
|
void fit(samples_t& X);
|
||||||
|
private:
|
||||||
|
void fit_uniform(const samples_t&);
|
||||||
|
void fit_quantile(const samples_t&);
|
||||||
|
int n_bins;
|
||||||
|
strategy_t strategy;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <set>
|
#include <set>
|
||||||
@@ -6,28 +12,27 @@
|
|||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) :
|
||||||
|
Discretizer(),
|
||||||
|
min_length(min_length_),
|
||||||
max_depth(max_depth_),
|
max_depth(max_depth_),
|
||||||
proposed_cuts(proposed)
|
proposed_cuts(proposed)
|
||||||
{
|
{
|
||||||
|
direction = bound_dir_t::RIGHT;
|
||||||
}
|
}
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp() = default;
|
|
||||||
|
|
||||||
CPPFImdlp::~CPPFImdlp() = default;
|
|
||||||
|
|
||||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||||
{
|
{
|
||||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||||
if (proposed_cuts == 0) {
|
if (proposed_cuts == 0) {
|
||||||
return numeric_limits<size_t>::max();
|
return numeric_limits<size_t>::max();
|
||||||
}
|
}
|
||||||
if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
|
if (proposed_cuts < 0 || proposed_cuts > static_cast<precision_t>(X.size())) {
|
||||||
throw invalid_argument("wrong proposed num_cuts value");
|
throw invalid_argument("wrong proposed num_cuts value");
|
||||||
}
|
}
|
||||||
if (proposed_cuts < 1)
|
if (proposed_cuts < 1)
|
||||||
return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
|
return static_cast<size_t>(round(static_cast<precision_t>(X.size()) * proposed_cuts));
|
||||||
return static_cast<size_t>(proposed_cuts);
|
return static_cast<size_t>(proposed_cuts); // The 2 extra cutpoints should not be considered here as this parameter is considered before they are added
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||||
@@ -60,6 +65,10 @@ namespace mdlp {
|
|||||||
resizeCutPoints();
|
resizeCutPoints();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Insert first & last X value to the cutpoints as them shall be ignored in transform
|
||||||
|
auto [vmin, vmax] = std::minmax_element(X.begin(), X.end());
|
||||||
|
cutPoints.push_back(*vmax);
|
||||||
|
cutPoints.insert(cutPoints.begin(), *vmin);
|
||||||
}
|
}
|
||||||
|
|
||||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
||||||
@@ -208,14 +217,5 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||||
}
|
}
|
||||||
labels_t& CPPFImdlp::transform(const samples_t& data)
|
|
||||||
{
|
|
||||||
discretizedData.clear();
|
|
||||||
discretizedData.reserve(data.size());
|
|
||||||
for (const precision_t& item : data) {
|
|
||||||
auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
|
||||||
discretizedData.push_back(upper - cutPoints.begin());
|
|
||||||
}
|
|
||||||
return discretizedData;
|
|
||||||
}
|
|
||||||
}
|
}
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#ifndef CPPFIMDLP_H
|
#ifndef CPPFIMDLP_H
|
||||||
#define CPPFIMDLP_H
|
#define CPPFIMDLP_H
|
||||||
|
|
||||||
@@ -6,18 +12,16 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
|
#include "Discretizer.h"
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class CPPFImdlp {
|
class CPPFImdlp : public Discretizer {
|
||||||
public:
|
public:
|
||||||
CPPFImdlp();
|
CPPFImdlp() = default;
|
||||||
CPPFImdlp(size_t, int, float);
|
CPPFImdlp(size_t min_length_, int max_depth_, float proposed);
|
||||||
~CPPFImdlp();
|
virtual ~CPPFImdlp() = default;
|
||||||
void fit(samples_t&, labels_t&);
|
void fit(samples_t& X_, labels_t& y_) override;
|
||||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
|
||||||
labels_t& transform(const samples_t&);
|
|
||||||
inline int get_depth() const { return depth; };
|
inline int get_depth() const { return depth; };
|
||||||
static inline std::string version() { return "1.1.3"; };
|
|
||||||
protected:
|
protected:
|
||||||
size_t min_length = 3;
|
size_t min_length = 3;
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
@@ -27,9 +31,7 @@ namespace mdlp {
|
|||||||
samples_t X = samples_t();
|
samples_t X = samples_t();
|
||||||
labels_t y = labels_t();
|
labels_t y = labels_t();
|
||||||
Metrics metrics = Metrics(y, indices);
|
Metrics metrics = Metrics(y, indices);
|
||||||
cutPoints_t cutPoints;
|
|
||||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||||
labels_t discretizedData = labels_t();
|
|
||||||
static indices_t sortIndices(samples_t&, labels_t&);
|
static indices_t sortIndices(samples_t&, labels_t&);
|
||||||
void computeCutPoints(size_t, size_t, int);
|
void computeCutPoints(size_t, size_t, int);
|
||||||
void resizeCutPoints();
|
void resizeCutPoints();
|
54
src/Discretizer.cpp
Normal file
54
src/Discretizer.cpp
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
|
#include "Discretizer.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
|
||||||
|
labels_t& Discretizer::transform(const samples_t& data)
|
||||||
|
{
|
||||||
|
discretizedData.clear();
|
||||||
|
discretizedData.reserve(data.size());
|
||||||
|
// CutPoints always have at least two items
|
||||||
|
// Have to ignore first and last cut points provided
|
||||||
|
auto first = cutPoints.begin() + 1;
|
||||||
|
auto last = cutPoints.end() - 1;
|
||||||
|
auto bound = direction == bound_dir_t::LEFT ? std::lower_bound<std::vector<precision_t>::iterator, precision_t> : std::upper_bound<std::vector<precision_t>::iterator, precision_t>;
|
||||||
|
for (const precision_t& item : data) {
|
||||||
|
auto pos = bound(first, last, item);
|
||||||
|
auto number = pos - first;
|
||||||
|
discretizedData.push_back(static_cast<label_t>(number));
|
||||||
|
}
|
||||||
|
return discretizedData;
|
||||||
|
}
|
||||||
|
labels_t& Discretizer::fit_transform(samples_t& X_, labels_t& y_)
|
||||||
|
{
|
||||||
|
fit(X_, y_);
|
||||||
|
return transform(X_);
|
||||||
|
}
|
||||||
|
void Discretizer::fit_t(const torch::Tensor& X_, const torch::Tensor& y_)
|
||||||
|
{
|
||||||
|
auto num_elements = X_.numel();
|
||||||
|
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||||
|
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||||
|
fit(X, y);
|
||||||
|
}
|
||||||
|
torch::Tensor Discretizer::transform_t(const torch::Tensor& X_)
|
||||||
|
{
|
||||||
|
auto num_elements = X_.numel();
|
||||||
|
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||||
|
auto result = transform(X);
|
||||||
|
return torch::tensor(result, torch_label_t);
|
||||||
|
}
|
||||||
|
torch::Tensor Discretizer::fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_)
|
||||||
|
{
|
||||||
|
auto num_elements = X_.numel();
|
||||||
|
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||||
|
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||||
|
auto result = fit_transform(X, y);
|
||||||
|
return torch::tensor(result, torch_label_t);
|
||||||
|
}
|
||||||
|
}
|
39
src/Discretizer.h
Normal file
39
src/Discretizer.h
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
|
#ifndef DISCRETIZER_H
|
||||||
|
#define DISCRETIZER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "typesFImdlp.h"
|
||||||
|
#include <torch/torch.h>
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
enum class bound_dir_t {
|
||||||
|
LEFT,
|
||||||
|
RIGHT
|
||||||
|
};
|
||||||
|
const auto torch_label_t = torch::kInt32;
|
||||||
|
class Discretizer {
|
||||||
|
public:
|
||||||
|
Discretizer() = default;
|
||||||
|
virtual ~Discretizer() = default;
|
||||||
|
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||||
|
virtual void fit(samples_t& X_, labels_t& y_) = 0;
|
||||||
|
labels_t& transform(const samples_t& data);
|
||||||
|
labels_t& fit_transform(samples_t& X_, labels_t& y_);
|
||||||
|
void fit_t(const torch::Tensor& X_, const torch::Tensor& y_);
|
||||||
|
torch::Tensor transform_t(const torch::Tensor& X_);
|
||||||
|
torch::Tensor fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_);
|
||||||
|
static inline std::string version() { return "2.0.1"; };
|
||||||
|
protected:
|
||||||
|
labels_t discretizedData = labels_t();
|
||||||
|
cutPoints_t cutPoints; // At least two cutpoints must be provided, the first and the last will be ignored in transform
|
||||||
|
bound_dir_t direction; // used in transform
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -1,11 +1,17 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
|
Metrics::Metrics(labels_t& y_, indices_t& indices_) : y(y_), indices(indices_),
|
||||||
numClasses(computeNumClasses(0, indices.size()))
|
numClasses(computeNumClasses(0, indices_.size()))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#ifndef CCMETRICS_H
|
#ifndef CCMETRICS_H
|
||||||
#define CCMETRICS_H
|
#define CCMETRICS_H
|
||||||
|
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#ifndef TYPES_H
|
#ifndef TYPES_H
|
||||||
#define TYPES_H
|
#define TYPES_H
|
||||||
|
|
||||||
@@ -8,8 +14,9 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
typedef float precision_t;
|
typedef float precision_t;
|
||||||
|
typedef int label_t;
|
||||||
typedef std::vector<precision_t> samples_t;
|
typedef std::vector<precision_t> samples_t;
|
||||||
typedef std::vector<int> labels_t;
|
typedef std::vector<label_t> labels_t;
|
||||||
typedef std::vector<size_t> indices_t;
|
typedef std::vector<size_t> indices_t;
|
||||||
typedef std::vector<precision_t> cutPoints_t;
|
typedef std::vector<precision_t> cutPoints_t;
|
||||||
typedef std::map<std::pair<int, int>, precision_t> cacheEnt_t;
|
typedef std::map<std::pair<int, int>, precision_t> cacheEnt_t;
|
@@ -1,132 +0,0 @@
|
|||||||
#include "ArffFiles.h"
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
ArffFiles::ArffFiles() = default;
|
|
||||||
|
|
||||||
vector<string> ArffFiles::getLines() const
|
|
||||||
{
|
|
||||||
return lines;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned long int ArffFiles::getSize() const
|
|
||||||
{
|
|
||||||
return lines.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<pair<string, string>> ArffFiles::getAttributes() const
|
|
||||||
{
|
|
||||||
return attributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
string ArffFiles::getClassName() const
|
|
||||||
{
|
|
||||||
return className;
|
|
||||||
}
|
|
||||||
|
|
||||||
string ArffFiles::getClassType() const
|
|
||||||
{
|
|
||||||
return classType;
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<mdlp::samples_t>& ArffFiles::getX()
|
|
||||||
{
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<int>& ArffFiles::getY()
|
|
||||||
{
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ArffFiles::load(const string& fileName, bool classLast)
|
|
||||||
{
|
|
||||||
ifstream file(fileName);
|
|
||||||
if (!file.is_open()) {
|
|
||||||
throw invalid_argument("Unable to open file");
|
|
||||||
}
|
|
||||||
string line;
|
|
||||||
string keyword;
|
|
||||||
string attribute;
|
|
||||||
string type;
|
|
||||||
string type_w;
|
|
||||||
while (getline(file, line)) {
|
|
||||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
|
||||||
stringstream ss(line);
|
|
||||||
ss >> keyword >> attribute;
|
|
||||||
type = "";
|
|
||||||
while (ss >> type_w)
|
|
||||||
type += type_w + " ";
|
|
||||||
attributes.emplace_back(trim(attribute), trim(type));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (line[0] == '@') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
lines.push_back(line);
|
|
||||||
}
|
|
||||||
file.close();
|
|
||||||
if (attributes.empty())
|
|
||||||
throw invalid_argument("No attributes found");
|
|
||||||
if (classLast) {
|
|
||||||
className = get<0>(attributes.back());
|
|
||||||
classType = get<1>(attributes.back());
|
|
||||||
attributes.pop_back();
|
|
||||||
} else {
|
|
||||||
className = get<0>(attributes.front());
|
|
||||||
classType = get<1>(attributes.front());
|
|
||||||
attributes.erase(attributes.begin());
|
|
||||||
}
|
|
||||||
generateDataset(classLast);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void ArffFiles::generateDataset(bool classLast)
|
|
||||||
{
|
|
||||||
X = vector<mdlp::samples_t>(attributes.size(), mdlp::samples_t(lines.size()));
|
|
||||||
auto yy = vector<string>(lines.size(), "");
|
|
||||||
int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
|
|
||||||
for (size_t i = 0; i < lines.size(); i++) {
|
|
||||||
stringstream ss(lines[i]);
|
|
||||||
string value;
|
|
||||||
int pos = 0;
|
|
||||||
int xIndex = 0;
|
|
||||||
while (getline(ss, value, ',')) {
|
|
||||||
if (pos++ == labelIndex) {
|
|
||||||
yy[i] = value;
|
|
||||||
} else {
|
|
||||||
X[xIndex++][i] = stof(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
y = factorize(yy);
|
|
||||||
}
|
|
||||||
|
|
||||||
string ArffFiles::trim(const string& source)
|
|
||||||
{
|
|
||||||
string s(source);
|
|
||||||
s.erase(0, s.find_first_not_of(" '\n\r\t"));
|
|
||||||
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
|
|
||||||
{
|
|
||||||
vector<int> yy;
|
|
||||||
yy.reserve(labels_t.size());
|
|
||||||
map<string, int> labelMap;
|
|
||||||
int i = 0;
|
|
||||||
for (const string& label : labels_t) {
|
|
||||||
if (labelMap.find(label) == labelMap.end()) {
|
|
||||||
labelMap[label] = i++;
|
|
||||||
}
|
|
||||||
yy.push_back(labelMap[label]);
|
|
||||||
}
|
|
||||||
return yy;
|
|
||||||
}
|
|
@@ -1,35 +0,0 @@
|
|||||||
#ifndef ARFFFILES_H
|
|
||||||
#define ARFFFILES_H
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include "../typesFImdlp.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
class ArffFiles {
|
|
||||||
private:
|
|
||||||
vector<string> lines;
|
|
||||||
vector<pair<string, string>> attributes;
|
|
||||||
string className;
|
|
||||||
string classType;
|
|
||||||
vector<mdlp::samples_t> X;
|
|
||||||
vector<int> y;
|
|
||||||
|
|
||||||
void generateDataset(bool);
|
|
||||||
|
|
||||||
public:
|
|
||||||
ArffFiles();
|
|
||||||
void load(const string&, bool = true);
|
|
||||||
vector<string> getLines() const;
|
|
||||||
unsigned long int getSize() const;
|
|
||||||
string getClassName() const;
|
|
||||||
string getClassType() const;
|
|
||||||
static string trim(const string&);
|
|
||||||
vector<mdlp::samples_t>& getX();
|
|
||||||
vector<int>& getY();
|
|
||||||
vector<pair<string, string>> getAttributes() const;
|
|
||||||
static vector<int> factorize(const vector<string>& labels_t);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@@ -1,9 +1,16 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include "ArffFiles.h"
|
#include <ArffFiles.hpp>
|
||||||
#include "../BinDisc.h"
|
#include "BinDisc.h"
|
||||||
|
#include "Experiments.hpp"
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
const float margin = 1e-4;
|
const float margin = 1e-4;
|
||||||
@@ -37,12 +44,14 @@ namespace mdlp {
|
|||||||
TEST_F(TestBinDisc3U, Easy3BinsUniform)
|
TEST_F(TestBinDisc3U, Easy3BinsUniform)
|
||||||
{
|
{
|
||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||||
fit(X);
|
auto y = labels_t();
|
||||||
|
fit(X, y);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_NEAR(3.66667, cuts[0], margin);
|
ASSERT_EQ(4, cuts.size());
|
||||||
EXPECT_NEAR(6.33333, cuts[1], margin);
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(3.66667, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(6.33333, cuts.at(2), margin);
|
||||||
|
EXPECT_NEAR(9.0, cuts.at(3), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -52,10 +61,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_NEAR(3.666667, cuts[0], margin);
|
ASSERT_EQ(4, cuts.size());
|
||||||
EXPECT_NEAR(6.333333, cuts[1], margin);
|
EXPECT_NEAR(1, cuts[0], margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(3.666667, cuts[1], margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(6.333333, cuts[2], margin);
|
||||||
|
EXPECT_NEAR(9, cuts[3], margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -65,10 +75,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4.0, cuts[0]);
|
ASSERT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(7.0, cuts[1]);
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||||
|
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -78,10 +89,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4, cuts[0]);
|
ASSERT_EQ(4, cuts.size());
|
||||||
EXPECT_EQ(7, cuts[1]);
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||||
|
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -91,10 +103,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_NEAR(4.33333, cuts[0], margin);
|
ASSERT_EQ(4, cuts.size());
|
||||||
EXPECT_NEAR(7.66667, cuts[1], margin);
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(4.33333, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(7.66667, cuts.at(2), margin);
|
||||||
|
EXPECT_NEAR(11.0, cuts.at(3), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -104,10 +117,11 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_NEAR(4.33333, cuts[0], margin);
|
ASSERT_EQ(4, cuts.size());
|
||||||
EXPECT_NEAR(7.66667, cuts[1], margin);
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(4.33333, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(7.66667, cuts.at(2), margin);
|
||||||
|
EXPECT_NEAR(11.0, cuts.at(3), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -117,8 +131,9 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
ASSERT_EQ(2, cuts.size());
|
||||||
EXPECT_EQ(1, cuts.size());
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
|
EXPECT_NEAR(1, cuts.at(1), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -128,8 +143,9 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
ASSERT_EQ(2, cuts.size());
|
||||||
EXPECT_EQ(1, cuts.size());
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
|
EXPECT_NEAR(1, cuts.at(1), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -139,16 +155,18 @@ namespace mdlp {
|
|||||||
samples_t X = {};
|
samples_t X = {};
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
ASSERT_EQ(2, cuts.size());
|
||||||
EXPECT_EQ(1, cuts.size());
|
EXPECT_NEAR(0, cuts.at(0), margin);
|
||||||
|
EXPECT_NEAR(0, cuts.at(1), margin);
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc3Q, EmptyQuantile)
|
TEST_F(TestBinDisc3Q, EmptyQuantile)
|
||||||
{
|
{
|
||||||
samples_t X = {};
|
samples_t X = {};
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
|
ASSERT_EQ(2, cuts.size());
|
||||||
EXPECT_EQ(1, cuts.size());
|
EXPECT_NEAR(0, cuts.at(0), margin);
|
||||||
|
EXPECT_NEAR(0, cuts.at(1), margin);
|
||||||
}
|
}
|
||||||
TEST(TestBinDisc3, ExceptionNumberBins)
|
TEST(TestBinDisc3, ExceptionNumberBins)
|
||||||
{
|
{
|
||||||
@@ -159,44 +177,41 @@ namespace mdlp {
|
|||||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_NEAR(1.66667, cuts[0], margin);
|
ASSERT_EQ(4, cuts.size());
|
||||||
EXPECT_NEAR(2.33333, cuts[1], margin);
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(1.66667, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(2.33333, cuts.at(2), margin);
|
||||||
|
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
|
labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
EXPECT_EQ(3.0, X[0]); // X is not modified
|
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc3Q, EasyRepeated)
|
TEST_F(TestBinDisc3Q, EasyRepeated)
|
||||||
{
|
{
|
||||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
std::cout << "cuts: ";
|
ASSERT_EQ(3, cuts.size());
|
||||||
for (auto cut : cuts) {
|
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||||
std::cout << cut << " ";
|
EXPECT_NEAR(1.66667, cuts.at(1), margin);
|
||||||
}
|
EXPECT_NEAR(3.0, cuts.at(2), margin);
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout << std::string(80, '-') << std::endl;
|
|
||||||
EXPECT_NEAR(1.66667, cuts[0], margin);
|
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[1]);
|
|
||||||
EXPECT_EQ(2, cuts.size());
|
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
|
labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
EXPECT_EQ(3.0, X[0]); // X is not modified
|
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc4U, Easy4BinsUniform)
|
TEST_F(TestBinDisc4U, Easy4BinsUniform)
|
||||||
{
|
{
|
||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(3.75, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(6.5, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(9.25, cuts[2]);
|
EXPECT_NEAR(3.75, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(6.5, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(9.25, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(12.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -206,11 +221,12 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(3.75, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(6.5, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(9.25, cuts[2]);
|
EXPECT_NEAR(3.75, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(6.5, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(9.25, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(12.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -220,11 +236,12 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4.0, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(7.0, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(10.0, cuts[2]);
|
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(13.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -234,11 +251,12 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4.0, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(7.0, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(10.0, cuts[2]);
|
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(13.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -248,11 +266,12 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4.25, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(7.5, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(10.75, cuts[2]);
|
EXPECT_NEAR(4.25, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(7.5, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(10.75, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(14.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -262,11 +281,12 @@ namespace mdlp {
|
|||||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4.25, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(7.5, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(10.75, cuts[2]);
|
EXPECT_NEAR(4.25, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(7.5, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(10.75, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(14.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -276,11 +296,12 @@ namespace mdlp {
|
|||||||
samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4.5, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(8, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(11.5, cuts[2]);
|
EXPECT_NEAR(4.5, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(8, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(11.5, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(15.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
|
labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -290,11 +311,12 @@ namespace mdlp {
|
|||||||
samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(4.5, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(8, cuts[1]);
|
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(11.5, cuts[2]);
|
EXPECT_NEAR(4.5, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(8, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(11.5, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(15.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
|
labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -305,11 +327,12 @@ namespace mdlp {
|
|||||||
// 0 1 2 3 4 5 6 7 8 9
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(1.0, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(2.0, cuts[1]);
|
EXPECT_NEAR(0.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(3.0, cuts[2]);
|
EXPECT_NEAR(1.0, cuts.at(1), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
|
EXPECT_NEAR(2.0, cuts.at(2), margin);
|
||||||
EXPECT_EQ(4, cuts.size());
|
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(4.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
@@ -320,32 +343,69 @@ namespace mdlp {
|
|||||||
// 0 1 2 3 4 5 6 7 8 9
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
fit(X);
|
fit(X);
|
||||||
auto cuts = getCutPoints();
|
auto cuts = getCutPoints();
|
||||||
EXPECT_EQ(2.0, cuts[0]);
|
ASSERT_EQ(5, cuts.size());
|
||||||
EXPECT_EQ(3.0, cuts[1]);
|
EXPECT_NEAR(0.0, cuts.at(0), margin);
|
||||||
EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
|
EXPECT_NEAR(1.0, cuts.at(1), margin);
|
||||||
EXPECT_EQ(3, cuts.size());
|
EXPECT_NEAR(2.0, cuts.at(2), margin);
|
||||||
|
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||||
|
EXPECT_NEAR(4.0, cuts.at(4), margin);
|
||||||
auto labels = transform(X);
|
auto labels = transform(X);
|
||||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 };
|
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||||
EXPECT_EQ(expected, labels);
|
EXPECT_EQ(expected, labels);
|
||||||
}
|
}
|
||||||
TEST_F(TestBinDisc4U, irisUniform)
|
TEST(TestBinDiscGeneric, Fileset)
|
||||||
{
|
{
|
||||||
ArffFiles file;
|
Experiments exps(data_path + "tests.txt");
|
||||||
file.load(data_path + "iris.arff", true);
|
int num = 0;
|
||||||
vector<samples_t>& X = file.getX();
|
while (exps.is_next()) {
|
||||||
fit(X[0]);
|
++num;
|
||||||
auto Xt = transform(X[0]);
|
Experiment exp = exps.next();
|
||||||
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
BinDisc disc(exp.n_bins_, exp.strategy_[0] == 'Q' ? strategy_t::QUANTILE : strategy_t::UNIFORM);
|
||||||
EXPECT_EQ(expected, Xt);
|
std::vector<precision_t> test;
|
||||||
}
|
if (exp.type_ == experiment_t::RANGE) {
|
||||||
TEST_F(TestBinDisc4Q, irisQuantile)
|
for (float i = exp.from_; i < exp.to_; i += exp.step_) {
|
||||||
{
|
test.push_back(i);
|
||||||
ArffFiles file;
|
}
|
||||||
file.load(data_path + "iris.arff", true);
|
} else {
|
||||||
vector<samples_t>& X = file.getX();
|
test = exp.dataset_;
|
||||||
fit(X[0]);
|
}
|
||||||
auto Xt = transform(X[0]);
|
// show_vector(test, "Test");
|
||||||
labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
auto empty = std::vector<int>();
|
||||||
EXPECT_EQ(expected, Xt);
|
auto Xt = disc.fit_transform(test, empty);
|
||||||
|
auto cuts = disc.getCutPoints();
|
||||||
|
EXPECT_EQ(exp.discretized_data_.size(), Xt.size());
|
||||||
|
auto flag = false;
|
||||||
|
size_t n_errors = 0;
|
||||||
|
if (num < 40) {
|
||||||
|
//
|
||||||
|
// Check discretization of only the first 40 tests as after we cannot ensure the same codification due to precision problems
|
||||||
|
//
|
||||||
|
for (int i = 0; i < exp.discretized_data_.size(); ++i) {
|
||||||
|
if (exp.discretized_data_.at(i) != Xt.at(i)) {
|
||||||
|
if (!flag) {
|
||||||
|
if (exp.type_ == experiment_t::RANGE)
|
||||||
|
std::cout << "+Exp #: " << num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl;
|
||||||
|
else {
|
||||||
|
std::cout << "+Exp #: " << num << " strategy: " << exp.strategy_ << " " << " n_bins: " << exp.n_bins_ << " ";
|
||||||
|
show_vector(exp.dataset_, "Dataset");
|
||||||
|
}
|
||||||
|
show_vector(cuts, "Cuts");
|
||||||
|
std::cout << "Error at " << i << " test[i]=" << test.at(i) << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl;
|
||||||
|
flag = true;
|
||||||
|
EXPECT_EQ(exp.discretized_data_.at(i), Xt.at(i));
|
||||||
|
}
|
||||||
|
n_errors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (flag) {
|
||||||
|
std::cout << "*** Found " << n_errors << " mistakes in this experiment dataset" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_EQ(exp.cutpoints_.size(), cuts.size());
|
||||||
|
for (int i = 0; i < exp.cutpoints_.size(); ++i) {
|
||||||
|
EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "* Number of experiments tested: " << num << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,10 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
include_directories(${GTEST_INCLUDE_DIRS})
|
include_directories(${GTEST_INCLUDE_DIRS})
|
||||||
|
|
||||||
|
|
||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
googletest
|
googletest
|
||||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||||
@@ -13,22 +8,37 @@ FetchContent_Declare(
|
|||||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||||
FetchContent_MakeAvailable(googletest)
|
FetchContent_MakeAvailable(googletest)
|
||||||
|
|
||||||
enable_testing()
|
include_directories(
|
||||||
|
${TORCH_INCLUDE_DIRS}
|
||||||
|
${fimdlp_SOURCE_DIR}/src
|
||||||
|
${fimdlp_SOURCE_DIR}/tests/lib/Files
|
||||||
|
)
|
||||||
|
|
||||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
add_executable(Metrics_unittest ${fimdlp_SOURCE_DIR}/src/Metrics.cpp Metrics_unittest.cpp)
|
||||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
|
||||||
add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp)
|
|
||||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
|
||||||
target_link_libraries(BinDisc_unittest GTest::gtest_main)
|
|
||||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
|
||||||
target_compile_options(BinDisc_unittest PRIVATE --coverage)
|
|
||||||
target_link_options(Metrics_unittest PRIVATE --coverage)
|
target_link_options(Metrics_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(FImdlp_unittest FImdlp_unittest.cpp
|
||||||
|
${fimdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${fimdlp_SOURCE_DIR}/src/Metrics.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp)
|
||||||
|
target_link_libraries(FImdlp_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||||
|
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||||
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(BinDisc_unittest BinDisc_unittest.cpp ${fimdlp_SOURCE_DIR}/src/BinDisc.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp)
|
||||||
|
target_link_libraries(BinDisc_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||||
|
target_compile_options(BinDisc_unittest PRIVATE --coverage)
|
||||||
target_link_options(BinDisc_unittest PRIVATE --coverage)
|
target_link_options(BinDisc_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
|
add_executable(Discretizer_unittest Discretizer_unittest.cpp
|
||||||
|
${fimdlp_SOURCE_DIR}/src/BinDisc.cpp ${fimdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${fimdlp_SOURCE_DIR}/src/Metrics.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp )
|
||||||
|
target_link_libraries(Discretizer_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||||
|
target_compile_options(Discretizer_unittest PRIVATE --coverage)
|
||||||
|
target_link_options(Discretizer_unittest PRIVATE --coverage)
|
||||||
|
|
||||||
include(GoogleTest)
|
include(GoogleTest)
|
||||||
|
|
||||||
gtest_discover_tests(Metrics_unittest)
|
gtest_discover_tests(Metrics_unittest)
|
||||||
gtest_discover_tests(FImdlp_unittest)
|
gtest_discover_tests(FImdlp_unittest)
|
||||||
gtest_discover_tests(BinDisc_unittest)
|
gtest_discover_tests(BinDisc_unittest)
|
||||||
|
gtest_discover_tests(Discretizer_unittest)
|
274
tests/Discretizer_unittest.cpp
Normal file
274
tests/Discretizer_unittest.cpp
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include <ArffFiles.hpp>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "Discretizer.h"
|
||||||
|
#include "BinDisc.h"
|
||||||
|
#include "CPPFImdlp.h"
|
||||||
|
|
||||||
|
namespace mdlp {
|
||||||
|
const float margin = 1e-4;
|
||||||
|
static std::string set_data_path()
|
||||||
|
{
|
||||||
|
std::string path = "../datasets/";
|
||||||
|
std::ifstream file(path + "iris.arff");
|
||||||
|
if (file.is_open()) {
|
||||||
|
file.close();
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
return "../../tests/datasets/";
|
||||||
|
}
|
||||||
|
const std::string data_path = set_data_path();
|
||||||
|
const labels_t iris_quantile = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
||||||
|
TEST(Discretizer, Version)
|
||||||
|
{
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||||
|
auto version = disc->version();
|
||||||
|
delete disc;
|
||||||
|
std::cout << "Version computed: " << version;
|
||||||
|
EXPECT_EQ("2.0.1", version);
|
||||||
|
}
|
||||||
|
TEST(Discretizer, BinIrisUniform)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
auto y = labels_t();
|
||||||
|
disc->fit(X[0], y);
|
||||||
|
auto Xt = disc->transform(X[0]);
|
||||||
|
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(expected, Xt);
|
||||||
|
}
|
||||||
|
TEST(Discretizer, BinIrisQuantile)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
auto y = labels_t();
|
||||||
|
disc->fit(X[0], y);
|
||||||
|
auto Xt = disc->transform(X[0]);
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(iris_quantile, Xt);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Discretizer, BinIrisQuantileTorch)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
auto X = file.getX();
|
||||||
|
auto y = file.getY();
|
||||||
|
auto X_torch = torch::tensor(X[0], torch::kFloat32);
|
||||||
|
auto yt = torch::tensor(y, torch::kInt32);
|
||||||
|
disc->fit_t(X_torch, yt);
|
||||||
|
torch::Tensor Xt = disc->transform_t(X_torch);
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(iris_quantile.size(), Xt.size(0));
|
||||||
|
for (int i = 0; i < iris_quantile.size(); ++i) {
|
||||||
|
EXPECT_EQ(iris_quantile.at(i), Xt[i].item<int>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST(Discretizer, BinIrisQuantileTorchFit_transform)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
auto X = file.getX();
|
||||||
|
auto y = file.getY();
|
||||||
|
auto X_torch = torch::tensor(X[0], torch::kFloat32);
|
||||||
|
auto yt = torch::tensor(y, torch::kInt32);
|
||||||
|
torch::Tensor Xt = disc->fit_transform_t(X_torch, yt);
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(iris_quantile.size(), Xt.size(0));
|
||||||
|
for (int i = 0; i < iris_quantile.size(); ++i) {
|
||||||
|
EXPECT_EQ(iris_quantile.at(i), Xt[i].item<int>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Discretizer, FImdlpIris)
|
||||||
|
{
|
||||||
|
auto labelsq = {
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
2,
|
||||||
|
};
|
||||||
|
labels_t expected = {
|
||||||
|
5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||||
|
5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
|
||||||
|
5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
|
||||||
|
3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
|
||||||
|
3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
|
||||||
|
4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
|
||||||
|
1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
|
||||||
|
};
|
||||||
|
ArffFiles file;
|
||||||
|
Discretizer* disc = new CPPFImdlp();
|
||||||
|
file.load(data_path + "iris.arff", true);
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
labels_t& y = file.getY();
|
||||||
|
disc->fit(X[1], y);
|
||||||
|
auto computed = disc->transform(X[1]);
|
||||||
|
delete disc;
|
||||||
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
|
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||||
|
EXPECT_EQ(computed[i], expected[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
139
tests/Experiments.hpp
Normal file
139
tests/Experiments.hpp
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
|
#ifndef EXPERIMENTS_HPP
|
||||||
|
#define EXPERIMENTS_HPP
|
||||||
|
#include<sstream>
|
||||||
|
#include<iostream>
|
||||||
|
#include<string>
|
||||||
|
#include<fstream>
|
||||||
|
#include<vector>
|
||||||
|
#include<tuple>
|
||||||
|
#include "typesFImdlp.h"
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void show_vector(const std::vector<T>& data, std::string title)
|
||||||
|
{
|
||||||
|
std::cout << title << ": ";
|
||||||
|
std::string sep = "";
|
||||||
|
for (const auto& d : data) {
|
||||||
|
std::cout << sep << d;
|
||||||
|
sep = ", ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
enum class experiment_t {
|
||||||
|
RANGE,
|
||||||
|
VECTOR
|
||||||
|
};
|
||||||
|
class Experiment {
|
||||||
|
public:
|
||||||
|
Experiment(float from_, float to_, float step_, int n_bins, std::string strategy, std::vector<int> data_discretized, std::vector<mdlp::precision_t> cutpoints) :
|
||||||
|
from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::RANGE }
|
||||||
|
{
|
||||||
|
validate_strategy();
|
||||||
|
|
||||||
|
}
|
||||||
|
Experiment(std::vector<mdlp::precision_t> dataset, int n_bins, std::string strategy, std::vector<int> data_discretized, std::vector<mdlp::precision_t> cutpoints) :
|
||||||
|
n_bins_{ n_bins }, strategy_{ strategy }, dataset_{ dataset }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::VECTOR }
|
||||||
|
{
|
||||||
|
validate_strategy();
|
||||||
|
}
|
||||||
|
void validate_strategy()
|
||||||
|
{
|
||||||
|
if (strategy_ != "Q" && strategy_ != "U") {
|
||||||
|
throw std::invalid_argument("Invalid strategy " + strategy_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
float from_;
|
||||||
|
float to_;
|
||||||
|
float step_;
|
||||||
|
int n_bins_;
|
||||||
|
std::string strategy_;
|
||||||
|
std::vector<mdlp::precision_t> dataset_;
|
||||||
|
std::vector<int> discretized_data_;
|
||||||
|
std::vector<mdlp::precision_t> cutpoints_;
|
||||||
|
experiment_t type_;
|
||||||
|
};
|
||||||
|
class Experiments {
|
||||||
|
public:
|
||||||
|
Experiments(const std::string filename) : filename{ filename }
|
||||||
|
{
|
||||||
|
test_file.open(filename);
|
||||||
|
if (!test_file.is_open()) {
|
||||||
|
throw std::runtime_error("File " + filename + " not found");
|
||||||
|
}
|
||||||
|
exp_end = false;
|
||||||
|
}
|
||||||
|
~Experiments()
|
||||||
|
{
|
||||||
|
test_file.close();
|
||||||
|
}
|
||||||
|
bool end() const
|
||||||
|
{
|
||||||
|
return exp_end;
|
||||||
|
}
|
||||||
|
bool is_next()
|
||||||
|
{
|
||||||
|
while (std::getline(test_file, line) && line[0] == '#');
|
||||||
|
if (test_file.eof()) {
|
||||||
|
exp_end = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
Experiment next()
|
||||||
|
{
|
||||||
|
return parse_experiment(line);
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
std::tuple<float, float, float, int, std::string> parse_header(const std::string& line)
|
||||||
|
{
|
||||||
|
std::istringstream iss(line);
|
||||||
|
std::string from_, to_, step_, n_bins, strategy;
|
||||||
|
iss >> from_ >> to_ >> step_ >> n_bins >> strategy;
|
||||||
|
return { std::stof(from_), std::stof(to_), std::stof(step_), std::stoi(n_bins), strategy };
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
std::vector<T> parse_vector(const std::string& line)
|
||||||
|
{
|
||||||
|
std::istringstream iss(line);
|
||||||
|
std::vector<T> data;
|
||||||
|
std::string d;
|
||||||
|
while (iss >> d) {
|
||||||
|
data.push_back(std::is_same<T, float>::value ? std::stof(d) : std::stoi(d));
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
Experiment parse_experiment(std::string& line)
|
||||||
|
{
|
||||||
|
// Read experiment lines
|
||||||
|
std::string experiment, data, cuts, strategy;
|
||||||
|
std::getline(test_file, experiment);
|
||||||
|
std::getline(test_file, data);
|
||||||
|
std::getline(test_file, cuts);
|
||||||
|
// split data into variables
|
||||||
|
float from_, to_, step_;
|
||||||
|
int n_bins;
|
||||||
|
std::vector<mdlp::precision_t> dataset;
|
||||||
|
auto data_discretized = parse_vector<int>(data);
|
||||||
|
auto cutpoints = parse_vector<mdlp::precision_t>(cuts);
|
||||||
|
if (line == "RANGE") {
|
||||||
|
tie(from_, to_, step_, n_bins, strategy) = parse_header(experiment);
|
||||||
|
return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints };
|
||||||
|
}
|
||||||
|
strategy = experiment.substr(0, 1);
|
||||||
|
n_bins = std::stoi(experiment.substr(1, 1));
|
||||||
|
data = experiment.substr(3, experiment.size() - 4);
|
||||||
|
dataset = parse_vector<mdlp::precision_t>(data);
|
||||||
|
return Experiment(dataset, n_bins, strategy, data_discretized, cutpoints);
|
||||||
|
}
|
||||||
|
std::ifstream test_file;
|
||||||
|
std::string filename;
|
||||||
|
std::string line;
|
||||||
|
bool exp_end;
|
||||||
|
};
|
||||||
|
#endif
|
@@ -1,9 +1,15 @@
|
|||||||
#include "gtest/gtest.h"
|
// ****************************************************************
|
||||||
#include "../Metrics.h"
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
#include "../CPPFImdlp.h"
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "ArffFiles.h"
|
#include <ArffFiles.hpp>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "Metrics.h"
|
||||||
|
#include "CPPFImdlp.h"
|
||||||
|
|
||||||
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
|
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
|
||||||
try { \
|
try { \
|
||||||
@@ -124,7 +130,7 @@ namespace mdlp {
|
|||||||
{
|
{
|
||||||
samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
|
samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
|
||||||
labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
|
labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
|
||||||
cutPoints_t expected = { 1.5f, 2.5f };
|
cutPoints_t expected = { 1.0, 1.5f, 2.5f, 4.0 };
|
||||||
fit(X_, y_);
|
fit(X_, y_);
|
||||||
auto computed = getCutPoints();
|
auto computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), expected.size());
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
@@ -167,29 +173,31 @@ namespace mdlp {
|
|||||||
y = { 1 };
|
y = { 1 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 0);
|
EXPECT_EQ(computed.size(), 2);
|
||||||
X = { 1, 3 };
|
X = { 1, 3 };
|
||||||
y = { 1, 2 };
|
y = { 1, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 0);
|
EXPECT_EQ(computed.size(), 2);
|
||||||
X = { 2, 4 };
|
X = { 2, 4 };
|
||||||
y = { 1, 2 };
|
y = { 1, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 0);
|
EXPECT_EQ(computed.size(), 2);
|
||||||
X = { 1, 2, 3 };
|
X = { 1, 2, 3 };
|
||||||
y = { 1, 2, 2 };
|
y = { 1, 2, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 1);
|
EXPECT_EQ(computed.size(), 3);
|
||||||
EXPECT_NEAR(computed[0], 1.5, precision);
|
EXPECT_NEAR(computed[0], 1, precision);
|
||||||
|
EXPECT_NEAR(computed[1], 1.5, precision);
|
||||||
|
EXPECT_NEAR(computed[2], 3, precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TestFImdlp, TestArtificialDataset)
|
TEST_F(TestFImdlp, TestArtificialDataset)
|
||||||
{
|
{
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
cutPoints_t expected = { 5.05f };
|
cutPoints_t expected = { 4.7, 5.05, 6.0 };
|
||||||
vector<precision_t> computed = getCutPoints();
|
vector<precision_t> computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), expected.size());
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||||
@@ -200,10 +208,10 @@ namespace mdlp {
|
|||||||
TEST_F(TestFImdlp, TestIris)
|
TEST_F(TestFImdlp, TestIris)
|
||||||
{
|
{
|
||||||
vector<cutPoints_t> expected = {
|
vector<cutPoints_t> expected = {
|
||||||
{5.45f, 5.75f},
|
{4.3, 5.45f, 5.75f, 7.9},
|
||||||
{2.75f, 2.85f, 2.95f, 3.05f, 3.35f},
|
{2, 2.75f, 2.85f, 2.95f, 3.05f, 3.35f, 4.4},
|
||||||
{2.45f, 4.75f, 5.05f},
|
{1, 2.45f, 4.75f, 5.05f, 6.9},
|
||||||
{0.8f, 1.75f}
|
{0.1, 0.8f, 1.75f, 2.5}
|
||||||
};
|
};
|
||||||
vector<int> depths = { 3, 5, 4, 3 };
|
vector<int> depths = { 3, 5, 4, 3 };
|
||||||
auto test = CPPFImdlp();
|
auto test = CPPFImdlp();
|
||||||
@@ -213,7 +221,7 @@ namespace mdlp {
|
|||||||
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
||||||
{
|
{
|
||||||
cutPoints_t expected;
|
cutPoints_t expected;
|
||||||
expected = { 1.5 };
|
expected = { 0, 1.5, 2 };
|
||||||
samples_t X_ = { 0, 1, 2, 2, 2 };
|
samples_t X_ = { 0, 1, 2, 2, 2 };
|
||||||
labels_t y_ = { 1, 1, 1, 2, 2 };
|
labels_t y_ = { 1, 1, 1, 2, 2 };
|
||||||
fit(X_, y_);
|
fit(X_, y_);
|
||||||
@@ -247,10 +255,10 @@ namespace mdlp {
|
|||||||
// Set max_depth to 1
|
// Set max_depth to 1
|
||||||
auto test = CPPFImdlp(3, 1, 0);
|
auto test = CPPFImdlp(3, 1, 0);
|
||||||
vector<cutPoints_t> expected = {
|
vector<cutPoints_t> expected = {
|
||||||
{5.45f},
|
{4.3, 5.45f, 7.9},
|
||||||
{3.35f},
|
{2, 3.35f, 4.4},
|
||||||
{2.45f},
|
{1, 2.45f, 6.9},
|
||||||
{0.8f}
|
{0.1, 0.8f, 2.5}
|
||||||
};
|
};
|
||||||
vector<int> depths = { 1, 1, 1, 1 };
|
vector<int> depths = { 1, 1, 1, 1 };
|
||||||
test_dataset(test, "iris", expected, depths);
|
test_dataset(test, "iris", expected, depths);
|
||||||
@@ -261,10 +269,10 @@ namespace mdlp {
|
|||||||
auto test = CPPFImdlp(75, 100, 0);
|
auto test = CPPFImdlp(75, 100, 0);
|
||||||
// Set min_length to 75
|
// Set min_length to 75
|
||||||
vector<cutPoints_t> expected = {
|
vector<cutPoints_t> expected = {
|
||||||
{5.45f, 5.75f},
|
{4.3, 5.45f, 5.75f, 7.9},
|
||||||
{2.85f, 3.35f},
|
{2, 2.85f, 3.35f, 4.4},
|
||||||
{2.45f, 4.75f},
|
{1, 2.45f, 4.75f, 6.9},
|
||||||
{0.8f, 1.75f}
|
{0.1, 0.8f, 1.75f, 2.5}
|
||||||
};
|
};
|
||||||
vector<int> depths = { 3, 2, 2, 2 };
|
vector<int> depths = { 3, 2, 2, 2 };
|
||||||
test_dataset(test, "iris", expected, depths);
|
test_dataset(test, "iris", expected, depths);
|
||||||
@@ -275,10 +283,10 @@ namespace mdlp {
|
|||||||
// Set min_length to 75
|
// Set min_length to 75
|
||||||
auto test = CPPFImdlp(75, 2, 0);
|
auto test = CPPFImdlp(75, 2, 0);
|
||||||
vector<cutPoints_t> expected = {
|
vector<cutPoints_t> expected = {
|
||||||
{5.45f, 5.75f},
|
{4.3, 5.45f, 5.75f, 7.9},
|
||||||
{2.85f, 3.35f},
|
{2, 2.85f, 3.35f, 4.4},
|
||||||
{2.45f, 4.75f},
|
{1, 2.45f, 4.75f, 6.9},
|
||||||
{0.8f, 1.75f}
|
{0.1, 0.8f, 1.75f, 2.5}
|
||||||
};
|
};
|
||||||
vector<int> depths = { 2, 2, 2, 2 };
|
vector<int> depths = { 2, 2, 2, 2 };
|
||||||
test_dataset(test, "iris", expected, depths);
|
test_dataset(test, "iris", expected, depths);
|
||||||
@@ -289,10 +297,10 @@ namespace mdlp {
|
|||||||
// Set min_length to 75
|
// Set min_length to 75
|
||||||
auto test = CPPFImdlp(75, 2, 1);
|
auto test = CPPFImdlp(75, 2, 1);
|
||||||
vector<cutPoints_t> expected = {
|
vector<cutPoints_t> expected = {
|
||||||
{5.45f},
|
{4.3, 5.45f, 7.9},
|
||||||
{2.85f},
|
{2, 2.85f, 4.4},
|
||||||
{2.45f},
|
{1, 2.45f, 6.9},
|
||||||
{0.8f}
|
{0.1, 0.8f, 2.5}
|
||||||
};
|
};
|
||||||
vector<int> depths = { 2, 2, 2, 2 };
|
vector<int> depths = { 2, 2, 2, 2 };
|
||||||
test_dataset(test, "iris", expected, depths);
|
test_dataset(test, "iris", expected, depths);
|
||||||
@@ -304,10 +312,10 @@ namespace mdlp {
|
|||||||
// Set min_length to 75
|
// Set min_length to 75
|
||||||
auto test = CPPFImdlp(75, 2, 0.2f);
|
auto test = CPPFImdlp(75, 2, 0.2f);
|
||||||
vector<cutPoints_t> expected = {
|
vector<cutPoints_t> expected = {
|
||||||
{5.45f, 5.75f},
|
{4.3, 5.45f, 5.75f, 7.9},
|
||||||
{2.85f, 3.35f},
|
{2, 2.85f, 3.35f, 4.4},
|
||||||
{2.45f, 4.75f},
|
{1, 2.45f, 4.75f, 6.9},
|
||||||
{0.8f, 1.75f}
|
{0.1, 0.8f, 1.75f, 2.5}
|
||||||
};
|
};
|
||||||
vector<int> depths = { 2, 2, 2, 2 };
|
vector<int> depths = { 2, 2, 2, 2 };
|
||||||
test_dataset(test, "iris", expected, depths);
|
test_dataset(test, "iris", expected, depths);
|
||||||
@@ -327,7 +335,6 @@ namespace mdlp {
|
|||||||
computed = compute_max_num_cut_points();
|
computed = compute_max_num_cut_points();
|
||||||
ASSERT_EQ(expected, computed);
|
ASSERT_EQ(expected, computed);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, TransformTest)
|
TEST_F(TestFImdlp, TransformTest)
|
||||||
{
|
{
|
||||||
@@ -350,5 +357,10 @@ namespace mdlp {
|
|||||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||||
EXPECT_EQ(computed[i], expected[i]);
|
EXPECT_EQ(computed[i], expected[i]);
|
||||||
}
|
}
|
||||||
|
auto computed_ft = fit_transform(X[1], y);
|
||||||
|
EXPECT_EQ(computed_ft.size(), expected.size());
|
||||||
|
for (unsigned long i = 0; i < computed_ft.size(); i++) {
|
||||||
|
EXPECT_EQ(computed_ft[i], expected[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,14 +1,20 @@
|
|||||||
|
// ****************************************************************
|
||||||
|
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX - FileType: SOURCE
|
||||||
|
// SPDX - License - Identifier: MIT
|
||||||
|
// ****************************************************************
|
||||||
|
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include "../Metrics.h"
|
#include "Metrics.h"
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class TestMetrics: public Metrics, public testing::Test {
|
class TestMetrics : public Metrics, public testing::Test {
|
||||||
public:
|
public:
|
||||||
labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||||
precision_t precision = 0.000001f;
|
precision_t precision = 1e-6;
|
||||||
|
|
||||||
TestMetrics(): Metrics(y_, indices_) {};
|
TestMetrics() : Metrics(y_, indices_) {};
|
||||||
|
|
||||||
void SetUp() override
|
void SetUp() override
|
||||||
{
|
{
|
||||||
|
222
tests/datasets/tests.txt
Normal file
222
tests/datasets/tests.txt
Normal file
@@ -0,0 +1,222 @@
|
|||||||
|
#
|
||||||
|
# from, to, step, #bins, Q/U
|
||||||
|
# discretized data
|
||||||
|
# cut points
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Range experiments
|
||||||
|
#
|
||||||
|
RANGE
|
||||||
|
0, 100, 1, 4, Q
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||||
|
0.0, 24.75, 49.5, 74.25, 99.0
|
||||||
|
RANGE
|
||||||
|
0, 50, 1, 4, Q
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||||
|
0.0, 12.25, 24.5, 36.75, 49.0
|
||||||
|
RANGE
|
||||||
|
0, 100, 1, 3, Q
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
0.0, 33.0, 66.0, 99.0
|
||||||
|
RANGE
|
||||||
|
0, 50, 1, 3, Q
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
0.0, 16.33333, 32.66667, 49.0
|
||||||
|
RANGE
|
||||||
|
0, 10, 1, 3, Q
|
||||||
|
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
0.0, 3.0, 6.0, 9.0
|
||||||
|
RANGE
|
||||||
|
0, 100, 1, 4, U
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||||
|
0.0, 24.75, 49.5, 74.25, 99.0
|
||||||
|
RANGE
|
||||||
|
0, 50, 1, 4, U
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||||
|
0.0, 12.25, 24.5, 36.75, 49.0
|
||||||
|
RANGE
|
||||||
|
0, 100, 1, 3, U
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
0.0, 33.0, 66.0, 99.0
|
||||||
|
RANGE
|
||||||
|
0, 50, 1, 3, U
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
0.0, 16.33333, 32.66667, 49.0
|
||||||
|
RANGE
|
||||||
|
0, 10, 1, 3, U
|
||||||
|
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
0.0, 3.0, 6.0, 9.0
|
||||||
|
RANGE
|
||||||
|
1, 10, 1, 3, Q
|
||||||
|
0, 0, 0, 1, 1, 1, 2, 2, 2
|
||||||
|
1.0, 3.66667, 6.33333, 9.0
|
||||||
|
RANGE
|
||||||
|
1, 10, 1, 3, U
|
||||||
|
0, 0, 0, 1, 1, 1, 2, 2, 2
|
||||||
|
1.0, 3.66667, 6.33333, 9.0
|
||||||
|
RANGE
|
||||||
|
1, 11, 1, 3, Q
|
||||||
|
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.0, 7.0, 10.0
|
||||||
|
RANGE
|
||||||
|
1, 11, 1, 3, U
|
||||||
|
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.0, 7.0, 10.0
|
||||||
|
RANGE
|
||||||
|
1, 12, 1, 3, Q
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.33333, 7.66667, 11.0
|
||||||
|
RANGE
|
||||||
|
1, 12, 1, 3, U
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.33333, 7.66667, 11.0
|
||||||
|
RANGE
|
||||||
|
1, 13, 1, 3, Q
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.66667, 8.33333, 12.0
|
||||||
|
RANGE
|
||||||
|
1, 13, 1, 3, U
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.66667, 8.33333, 12.0
|
||||||
|
RANGE
|
||||||
|
1, 14, 1, 3, Q
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.0, 9.0, 13.0
|
||||||
|
RANGE
|
||||||
|
1, 14, 1, 3, U
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.0, 9.0, 13.0
|
||||||
|
RANGE
|
||||||
|
1, 15, 1, 3, Q
|
||||||
|
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.33333, 9.66667, 14.0
|
||||||
|
RANGE
|
||||||
|
1, 15, 1, 3, U
|
||||||
|
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.33333, 9.66667, 14.0
|
||||||
|
#
|
||||||
|
# Vector experiments
|
||||||
|
#
|
||||||
|
VECTOR
|
||||||
|
Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
||||||
|
1, 0, 0, 1, 0, 0, 1, 0, 0
|
||||||
|
1.0, 1.66667, 3.0
|
||||||
|
VECTOR
|
||||||
|
U3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
||||||
|
2, 0, 0, 2, 0, 0, 2, 0, 0
|
||||||
|
1.0, 1.66667, 2.33333, 3.0
|
||||||
|
VECTOR
|
||||||
|
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.66667, 8.33333, 12.0
|
||||||
|
VECTOR
|
||||||
|
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
1.0, 4.66667, 8.33333, 12.0
|
||||||
|
VECTOR
|
||||||
|
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.0, 9.0, 13.0
|
||||||
|
VECTOR
|
||||||
|
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
||||||
|
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.0, 9.0, 13.0
|
||||||
|
VECTOR
|
||||||
|
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
||||||
|
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.33333, 9.66667, 14.0
|
||||||
|
VECTOR
|
||||||
|
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
||||||
|
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.33333, 9.66667, 14.0
|
||||||
|
VECTOR
|
||||||
|
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
|
||||||
|
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.66667, 10.33333, 15.0
|
||||||
|
VECTOR
|
||||||
|
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
|
||||||
|
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||||
|
1.0, 5.66667, 10.33333, 15.0
|
||||||
|
VECTOR
|
||||||
|
Q3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||||
|
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
|
||||||
|
1.0, 5.66667, 10.33333, 15.0
|
||||||
|
VECTOR
|
||||||
|
U3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||||
|
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
|
||||||
|
1.0, 5.66667, 10.33333, 15.0
|
||||||
|
VECTOR
|
||||||
|
Q3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
||||||
|
0, 1, 1, 1, 1, 1, 2, 2, 2, 2
|
||||||
|
0.0, 1.0, 3.0, 4.0
|
||||||
|
VECTOR
|
||||||
|
U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
||||||
|
0, 0, 0, 0, 1, 1, 2, 2, 2, 2
|
||||||
|
0.0, 1.33333, 2.66667, 4.0
|
||||||
|
#
|
||||||
|
# Vector experiments with iris
|
||||||
|
#
|
||||||
|
VECTOR
|
||||||
|
Q3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||||
|
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1
|
||||||
|
4.3, 5.4, 6.3, 7.9
|
||||||
|
VECTOR
|
||||||
|
U3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1, 2, 0, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1
|
||||||
|
4.3, 5.5, 6.7, 7.9
|
||||||
|
VECTOR
|
||||||
|
Q4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||||
|
1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2
|
||||||
|
4.3, 5.1, 5.8, 6.4, 7.9
|
||||||
|
VECTOR
|
||||||
|
U4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||||
|
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
|
||||||
|
4.3, 5.2, 6.1, 7.0, 7.9
|
||||||
|
VECTOR
|
||||||
|
Q3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||||
|
2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, 1, 0, 0, 2, 1, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 0, 1, 0, 2, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 1
|
||||||
|
2.0, 2.9, 3.2, 4.4
|
||||||
|
VECTOR
|
||||||
|
U3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||||
|
1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
|
||||||
|
2.0, 2.8, 3.6, 4.4
|
||||||
|
VECTOR
|
||||||
|
Q4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||||
|
3, 2, 2, 2, 3, 3, 3, 3, 1, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 0, 2, 3, 3, 2, 3, 2, 3, 3, 2, 2, 2, 0, 1, 1, 3, 0, 1, 0, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 0, 0, 0, 0, 0, 2, 3, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 1, 1, 0, 1, 3, 0, 2, 1, 2, 2, 0, 1, 0, 3, 2, 0, 2, 0, 1, 2, 2, 3, 0, 0, 2, 1, 1, 0, 3, 2, 1, 2, 1, 2, 1, 3, 1, 1, 0, 2, 3, 2, 2, 2, 2, 2, 0, 2, 3, 2, 0, 2, 3, 2
|
||||||
|
2.0, 2.8, 3.0, 3.3, 4.4
|
||||||
|
VECTOR
|
||||||
|
U4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||||
|
2, 1, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 3, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 3, 1, 3, 2, 2, 2, 2, 2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, 1, 3, 1, 0, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 0, 1, 2, 1
|
||||||
|
2.0, 2.6, 3.2, 3.8, 4.4
|
||||||
|
VECTOR
|
||||||
|
Q3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
1.0, 2.63333, 4.9, 6.9
|
||||||
|
VECTOR
|
||||||
|
U3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
1.0, 2.96667, 4.93333, 6.9
|
||||||
|
VECTOR
|
||||||
|
Q4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||||
|
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3
|
||||||
|
1.0, 1.6, 4.35, 5.1, 6.9
|
||||||
|
VECTOR
|
||||||
|
U4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 3, 2, 2, 2, 2, 2
|
||||||
|
1.0, 2.475, 3.95, 5.425, 6.9
|
||||||
|
VECTOR
|
||||||
|
Q3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
0.1, 0.86667, 1.6, 2.5
|
||||||
|
VECTOR
|
||||||
|
U3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||||
|
0.1, 0.9, 1.7, 2.5
|
||||||
|
VECTOR
|
||||||
|
Q4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||||
|
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||||
|
0.1, 0.3, 1.3, 1.8, 2.5
|
||||||
|
VECTOR
|
||||||
|
U4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2
|
||||||
|
0.1, 0.7, 1.3, 1.9, 2.5
|
1
tests/lib/Files
Submodule
1
tests/lib/Files
Submodule
Submodule tests/lib/Files added at a5316928d4
18
tests/test
18
tests/test
@@ -1,18 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
if [ -d build ] ; then
|
|
||||||
rm -fr build
|
|
||||||
fi
|
|
||||||
if [ -d gcovr-report ] ; then
|
|
||||||
rm -fr gcovr-report
|
|
||||||
fi
|
|
||||||
cmake -S . -B build -Wno-dev
|
|
||||||
cmake --build build
|
|
||||||
cd build
|
|
||||||
ctest --output-on-failure
|
|
||||||
cd ..
|
|
||||||
mkdir gcovr-report
|
|
||||||
#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
|
||||||
#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
|
|
||||||
#lcov --list lcoverage/main_coverage.info
|
|
||||||
cd ..
|
|
||||||
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines
|
|
@@ -1,404 +0,0 @@
|
|||||||
from scipy.io.arff import loadarff
|
|
||||||
from sklearn.preprocessing import KBinsDiscretizer
|
|
||||||
|
|
||||||
|
|
||||||
def test(clf, X, expected, title):
|
|
||||||
X = [[x] for x in X]
|
|
||||||
clf.fit(X)
|
|
||||||
computed = [int(x[0]) for x in clf.transform(X)]
|
|
||||||
print(f"{title}")
|
|
||||||
print(f"{computed=}")
|
|
||||||
print(f"{expected=}")
|
|
||||||
assert computed == expected
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
|
|
||||||
# Test Uniform Strategy
|
|
||||||
clf3u = KBinsDiscretizer(
|
|
||||||
n_bins=3, encode="ordinal", strategy="uniform", subsample=200_000
|
|
||||||
)
|
|
||||||
clf3q = KBinsDiscretizer(
|
|
||||||
n_bins=3, encode="ordinal", strategy="quantile", subsample=200_000
|
|
||||||
)
|
|
||||||
clf4u = KBinsDiscretizer(
|
|
||||||
n_bins=4, encode="ordinal", strategy="uniform", subsample=200_000
|
|
||||||
)
|
|
||||||
clf4q = KBinsDiscretizer(
|
|
||||||
n_bins=4, encode="ordinal", strategy="quantile", subsample=200_000
|
|
||||||
)
|
|
||||||
#
|
|
||||||
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
|
|
||||||
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2]
|
|
||||||
test(clf3u, X, labels, title="Easy3BinsUniform")
|
|
||||||
test(clf3q, X, labels, title="Easy3BinsQuantile")
|
|
||||||
#
|
|
||||||
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
|
|
||||||
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 2]
|
|
||||||
# En C++ se obtiene el mismo resultado en ambos, no como aquí
|
|
||||||
labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2]
|
|
||||||
test(clf3u, X, labels, title="X10BinsUniform")
|
|
||||||
test(clf3q, X, labels2, title="X10BinsQuantile")
|
|
||||||
#
|
|
||||||
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
|
|
||||||
labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2]
|
|
||||||
# En C++ se obtiene el mismo resultado en ambos, no como aquí
|
|
||||||
# labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2]
|
|
||||||
test(clf3u, X, labels, title="X11BinsUniform")
|
|
||||||
test(clf3q, X, labels, title="X11BinsQuantile")
|
|
||||||
#
|
|
||||||
X = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
|
||||||
labels = [0, 0, 0, 0, 0, 0]
|
|
||||||
test(clf3u, X, labels, title="ConstantUniform")
|
|
||||||
test(clf3q, X, labels, title="ConstantQuantile")
|
|
||||||
#
|
|
||||||
X = [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
|
||||||
labels = [2, 0, 0, 2, 0, 0, 2, 0, 0]
|
|
||||||
labels2 = [1, 0, 0, 1, 0, 0, 1, 0, 0] # igual que en C++
|
|
||||||
test(clf3u, X, labels, title="EasyRepeatedUniform")
|
|
||||||
test(clf3q, X, labels2, title="EasyRepeatedQuantile")
|
|
||||||
#
|
|
||||||
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
|
||||||
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
|
|
||||||
test(clf4u, X, labels, title="Easy4BinsUniform")
|
|
||||||
test(clf4q, X, labels, title="Easy4BinsQuantile")
|
|
||||||
#
|
|
||||||
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
|
||||||
labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
|
|
||||||
test(clf4u, X, labels, title="X13BinsUniform")
|
|
||||||
test(clf4q, X, labels, title="X13BinsQuantile")
|
|
||||||
#
|
|
||||||
X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
|
||||||
labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
|
|
||||||
test(clf4u, X, labels, title="X14BinsUniform")
|
|
||||||
test(clf4q, X, labels, title="X14BinsQuantile")
|
|
||||||
#
|
|
||||||
X1 = [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
|
||||||
X2 = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
|
||||||
labels1 = [3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0]
|
|
||||||
labels2 = [3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0]
|
|
||||||
test(clf4u, X1, labels1, title="X15BinsUniform")
|
|
||||||
test(clf4q, X2, labels2, title="X15BinsQuantile")
|
|
||||||
#
|
|
||||||
X = [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
|
||||||
labels = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3]
|
|
||||||
test(clf4u, X, labels, title="RepeatedValuesUniform")
|
|
||||||
test(clf4q, X, labels, title="RepeatedValuesQuantile")
|
|
||||||
|
|
||||||
print(f"Uniform {clf4u.bin_edges_=}")
|
|
||||||
print(f"Quaintile {clf4q.bin_edges_=}")
|
|
||||||
print("-" * 80)
|
|
||||||
#
|
|
||||||
data, meta = loadarff("tests/datasets/iris.arff")
|
|
||||||
labelsu = [
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
0,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
]
|
|
||||||
labelsq = [
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
1,
|
|
||||||
3,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
3,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
3,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
0,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
1,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
2,
|
|
||||||
2,
|
|
||||||
]
|
|
||||||
test(clf4u, data["sepallength"], labelsu, title="IrisUniform")
|
|
||||||
test(clf4q, data["sepallength"], labelsq, title="IrisQuantile")
|
|
||||||
# print("Labels")
|
|
||||||
# print(labels)
|
|
||||||
# print("Expected")
|
|
||||||
# print(expected)
|
|
||||||
# for i in range(len(labels)):
|
|
||||||
# if labels[i] != expected[i]:
|
|
||||||
# print(f"Error at {i} {labels[i]} != {expected[i]}")
|
|
71
tests/tests_do.py
Normal file
71
tests/tests_do.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# ***************************************************************
|
||||||
|
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
# SPDX-FileType: SOURCE
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# ***************************************************************
|
||||||
|
|
||||||
|
import json
|
||||||
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
|
|
||||||
|
with open("datasets/tests.txt") as f:
|
||||||
|
data = f.readlines()
|
||||||
|
|
||||||
|
data = [x.strip() for x in data if x[0] != "#"]
|
||||||
|
|
||||||
|
errors = False
|
||||||
|
for i in range(0, len(data), 4):
|
||||||
|
experiment_type = data[i]
|
||||||
|
print("Experiment:", data[i + 1])
|
||||||
|
if experiment_type == "RANGE":
|
||||||
|
range_data = data[i + 1]
|
||||||
|
from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
|
||||||
|
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
|
||||||
|
else:
|
||||||
|
strategy_ = data[i + 1][0]
|
||||||
|
n_bins_ = data[i + 1][1]
|
||||||
|
vector = data[i + 1][2:]
|
||||||
|
X = [[float(x)] for x in json.loads(vector)]
|
||||||
|
|
||||||
|
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
|
||||||
|
disc = KBinsDiscretizer(
|
||||||
|
n_bins=int(n_bins_),
|
||||||
|
encode="ordinal",
|
||||||
|
strategy=strategy,
|
||||||
|
)
|
||||||
|
expected_data = data[i + 2]
|
||||||
|
cuts_data = data[i + 3]
|
||||||
|
disc.fit(X)
|
||||||
|
#
|
||||||
|
# Normalize the cutpoints to remove numerical errors such as 33.0000000001
|
||||||
|
# instead of 33
|
||||||
|
#
|
||||||
|
for j in range(len(disc.bin_edges_[0])):
|
||||||
|
disc.bin_edges_[0][j] = round(disc.bin_edges_[0][j], 5)
|
||||||
|
result = disc.transform(X)
|
||||||
|
result = [int(x) for x in result.flatten()]
|
||||||
|
expected = [int(x) for x in expected_data.split(",")]
|
||||||
|
#
|
||||||
|
# Check the Results
|
||||||
|
#
|
||||||
|
assert len(result) == len(expected)
|
||||||
|
for j in range(len(result)):
|
||||||
|
if result[j] != expected[j]:
|
||||||
|
print("* Error at", j, "Expected=", expected[j], "Result=", result[j])
|
||||||
|
errors = True
|
||||||
|
expected_cuts = disc.bin_edges_[0]
|
||||||
|
computed_cuts = [float(x) for x in cuts_data.split(",")]
|
||||||
|
assert len(expected_cuts) == len(computed_cuts)
|
||||||
|
for j in range(len(expected_cuts)):
|
||||||
|
if round(expected_cuts[j], 5) != computed_cuts[j]:
|
||||||
|
print(
|
||||||
|
"* Error at",
|
||||||
|
j,
|
||||||
|
"Expected=",
|
||||||
|
expected_cuts[j],
|
||||||
|
"Result=",
|
||||||
|
computed_cuts[j],
|
||||||
|
)
|
||||||
|
errors = True
|
||||||
|
if errors:
|
||||||
|
raise Exception("There were errors!")
|
||||||
|
print("*** All tests run succesfully! ***")
|
209
tests/tests_generate.ipynb
Normal file
209
tests/tests_generate.ipynb
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.preprocessing import KBinsDiscretizer\n",
|
||||||
|
"from sklearn.datasets import load_iris"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiments_range = [\n",
|
||||||
|
" [0, 100, 1, 4, \"Q\"],\n",
|
||||||
|
" [0, 50, 1, 4, \"Q\"],\n",
|
||||||
|
" [0, 100, 1, 3, \"Q\"],\n",
|
||||||
|
" [0, 50, 1, 3, \"Q\"],\n",
|
||||||
|
" [0, 10, 1, 3, \"Q\"],\n",
|
||||||
|
" [0, 100, 1, 4, \"U\"],\n",
|
||||||
|
" [0, 50, 1, 4, \"U\"],\n",
|
||||||
|
" [0, 100, 1, 3, \"U\"],\n",
|
||||||
|
" [0, 50, 1, 3, \"U\"],\n",
|
||||||
|
"# \n",
|
||||||
|
" [0, 10, 1, 3, \"U\"],\n",
|
||||||
|
" [1, 10, 1, 3, \"Q\"],\n",
|
||||||
|
" [1, 10, 1, 3, \"U\"],\n",
|
||||||
|
" [1, 11, 1, 3, \"Q\"],\n",
|
||||||
|
" [1, 11, 1, 3, \"U\"],\n",
|
||||||
|
" [1, 12, 1, 3, \"Q\"],\n",
|
||||||
|
" [1, 12, 1, 3, \"U\"],\n",
|
||||||
|
" [1, 13, 1, 3, \"Q\"],\n",
|
||||||
|
" [1, 13, 1, 3, \"U\"],\n",
|
||||||
|
" [1, 14, 1, 3, \"Q\"],\n",
|
||||||
|
" [1, 14, 1, 3, \"U\"],\n",
|
||||||
|
" [1, 15, 1, 3, \"Q\"],\n",
|
||||||
|
" [1, 15, 1, 3, \"U\"]\n",
|
||||||
|
"]\n",
|
||||||
|
"experiments_vectors = [\n",
|
||||||
|
" (3, [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]),\n",
|
||||||
|
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),\n",
|
||||||
|
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]),\n",
|
||||||
|
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]),\n",
|
||||||
|
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]),\n",
|
||||||
|
" (3, [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]),\n",
|
||||||
|
" (3, [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0])\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:307: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
|
||||||
|
" warnings.warn(\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"def write_lists(file, data, cuts):\n",
|
||||||
|
" sep = \"\"\n",
|
||||||
|
" for res in data:\n",
|
||||||
|
" file.write(f\"{sep}{int(res):d}\")\n",
|
||||||
|
" sep= \", \"\n",
|
||||||
|
" file.write(\"\\n\")\n",
|
||||||
|
" sep = \"\"\n",
|
||||||
|
" for res in cuts:\n",
|
||||||
|
" file.write(sep + str(round(res,5)))\n",
|
||||||
|
" sep = \", \"\n",
|
||||||
|
" file.write(\"\\n\")\n",
|
||||||
|
"\n",
|
||||||
|
"def normalize_cuts(cuts):\n",
|
||||||
|
" #\n",
|
||||||
|
" # Normalize the cutpoints to remove numerical errors such as 33.0000000001\n",
|
||||||
|
" # instead of 33\n",
|
||||||
|
" #\n",
|
||||||
|
" for k in range(cuts.shape[0]):\n",
|
||||||
|
" for i in range(len(cuts[k])):\n",
|
||||||
|
" cuts[k][i] = round(cuts[k][i], 5)\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"datasets/tests.txt\", \"w\") as file:\n",
|
||||||
|
" file.write(\"#\\n\")\n",
|
||||||
|
" file.write(\"# from, to, step, #bins, Q/U\\n\")\n",
|
||||||
|
" file.write(\"# discretized data\\n\")\n",
|
||||||
|
" file.write(\"# cut points\\n\")\n",
|
||||||
|
" file.write(\"#\\n\")\n",
|
||||||
|
" #\n",
|
||||||
|
" # Range experiments\n",
|
||||||
|
" #\n",
|
||||||
|
" file.write(\"#\\n\")\n",
|
||||||
|
" file.write(\"# Range experiments\\n\")\n",
|
||||||
|
" file.write(\"#\\n\")\n",
|
||||||
|
" for experiment in experiments_range:\n",
|
||||||
|
" file.write(\"RANGE\\n\")\n",
|
||||||
|
" (from_, to_, step_, bins_, strategy) = experiment\n",
|
||||||
|
" disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n",
|
||||||
|
" data = [[x] for x in range(from_, to_, step_)]\n",
|
||||||
|
" disc.fit(data)\n",
|
||||||
|
" normalize_cuts(disc.bin_edges_)\n",
|
||||||
|
" result = disc.transform(data)\n",
|
||||||
|
" file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n",
|
||||||
|
" write_lists(file, result, disc.bin_edges_[0])\n",
|
||||||
|
" #\n",
|
||||||
|
" # Vector experiments\n",
|
||||||
|
" #\n",
|
||||||
|
" file.write(\"#\\n\")\n",
|
||||||
|
" file.write(\"# Vector experiments\\n\")\n",
|
||||||
|
" file.write(\"#\\n\")\n",
|
||||||
|
" for n_bins, experiment in experiments_vectors:\n",
|
||||||
|
" for strategy in [\"Q\", \"U\"]:\n",
|
||||||
|
" file.write(\"VECTOR\\n\")\n",
|
||||||
|
" file.write(f\"{strategy}{n_bins}{experiment}\\n\")\n",
|
||||||
|
" disc = KBinsDiscretizer(\n",
|
||||||
|
" n_bins=n_bins,\n",
|
||||||
|
" encode=\"ordinal\",\n",
|
||||||
|
" \n",
|
||||||
|
" strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\",\n",
|
||||||
|
" )\n",
|
||||||
|
" data = [[x] for x in experiment]\n",
|
||||||
|
" disc.fit(data)\n",
|
||||||
|
" normalize_cuts(disc.bin_edges_)\n",
|
||||||
|
" result = disc.transform(data)\n",
|
||||||
|
" write_lists(file, result, disc.bin_edges_[0])\n",
|
||||||
|
" #\n",
|
||||||
|
" # Vector experiments iris\n",
|
||||||
|
" #\n",
|
||||||
|
" file.write(\"#\\n\");\n",
|
||||||
|
" file.write(\"# Vector experiments with iris\\n\");\n",
|
||||||
|
" file.write(\"#\\n\");\n",
|
||||||
|
" X, y = load_iris(return_X_y=True)\n",
|
||||||
|
" for i in range(X.shape[1]):\n",
|
||||||
|
" for n_bins in [3, 4]:\n",
|
||||||
|
" for strategy in [\"Q\", \"U\"]:\n",
|
||||||
|
" file.write(\"VECTOR\\n\")\n",
|
||||||
|
" experiment = X[:, i]\n",
|
||||||
|
" file.write(f\"{strategy}{n_bins}{experiment.tolist()}\\n\")\n",
|
||||||
|
" disc = KBinsDiscretizer(\n",
|
||||||
|
" n_bins=n_bins,\n",
|
||||||
|
" encode=\"ordinal\",\n",
|
||||||
|
" strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\")\n",
|
||||||
|
" data = [[x] for x in experiment]\n",
|
||||||
|
" disc.fit(data)\n",
|
||||||
|
" normalize_cuts(disc.bin_edges_)\n",
|
||||||
|
" result = disc.transform(data)\n",
|
||||||
|
" write_lists(file, result, disc.bin_edges_[0])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Cut points: [array([ 0., 33., 66., 99.])]\n",
|
||||||
|
"Mistaken transformed data disc.transform([[33]]) = [[0.]]\n",
|
||||||
|
"Reason of the mistake the cutpoint has decimals (double): 33.00000000000001\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#\n",
|
||||||
|
"# Proving the mistakes due to floating point precision\n",
|
||||||
|
"#\n",
|
||||||
|
"from sklearn.preprocessing import KBinsDiscretizer\n",
|
||||||
|
"\n",
|
||||||
|
"data = [[x] for x in range(100)]\n",
|
||||||
|
"disc = KBinsDiscretizer(n_bins=3, encode=\"ordinal\", strategy=\"quantile\")\n",
|
||||||
|
"disc.fit(data)\n",
|
||||||
|
"print(\"Cut points: \", disc.bin_edges_)\n",
|
||||||
|
"print(\"Mistaken transformed data disc.transform([[33]]) =\", disc.transform([[33]]))\n",
|
||||||
|
"print(\"Reason of the mistake the cutpoint has decimals (double): \", disc.bin_edges_[0][1])"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "base",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.1.undefined"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
38
update_coverage.py
Normal file
38
update_coverage.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# ***************************************************************
|
||||||
|
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
# SPDX-FileType: SOURCE
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# ***************************************************************
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
readme_file = "README.md"
|
||||||
|
print("Updating coverage...")
|
||||||
|
# Generate badge line
|
||||||
|
output = subprocess.check_output(
|
||||||
|
"lcov --summary " + sys.argv[1] + "/coverage.info",
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
value = output.decode("utf-8").strip()
|
||||||
|
percentage = 0
|
||||||
|
for line in value.splitlines():
|
||||||
|
if "lines" in line:
|
||||||
|
percentage = float(line.split(":")[1].split("%")[0])
|
||||||
|
break
|
||||||
|
print(f"Coverage: {percentage}%")
|
||||||
|
if percentage < 90:
|
||||||
|
print("⛔Coverage is less than 90%. I won't update the badge.")
|
||||||
|
sys.exit(1)
|
||||||
|
percentage_label = str(percentage).replace(".", ",")
|
||||||
|
coverage_line = f"[](html/index.html)"
|
||||||
|
# Update README.md
|
||||||
|
with open(readme_file, "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
with open(readme_file, "w") as f:
|
||||||
|
for line in lines:
|
||||||
|
if "img.shields.io/badge/Coverage" in line:
|
||||||
|
f.write(coverage_line + "\n")
|
||||||
|
else:
|
||||||
|
f.write(line)
|
||||||
|
print(f"✅Coverage updated with value: {percentage}")
|
Reference in New Issue
Block a user