diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 98bac15..959641c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,9 +13,10 @@ jobs: env: BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed steps: - - uses: actions/checkout@v4.1.6 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + submodules: recursive - name: Install sonar-scanner and build-wrapper uses: SonarSource/sonarcloud-github-c-cpp@v2 - name: Install lcov & gcovr @@ -28,17 +29,16 @@ jobs: unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip - name: Tests & build-wrapper run: | - cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DENABLE_TESTING=ON - build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release + cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON + build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Debug + cmake --build build -j 4 cd build - make - ctest -C Release --output-on-failure --test-dir tests - cd .. - gcovr -f CPPFImdlp.cpp -f Metrics.cpp -f BinDisc.cpp -f Discretizer.cpp --txt --sonarqube=coverage.xml + ctest -C Debug --output-on-failure -j 4 + gcovr -f ../src/CPPFImdlp.cpp -f ../src/Metrics.cpp -f ../src/BinDisc.cpp -f ../src/Discretizer.cpp --txt --sonarqube=coverage.xml - name: Run sonar-scanner env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} run: | - sonar-scanner --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ - --define sonar.coverageReportPaths=coverage.xml + sonar-scanner --define sonar.cfamily.compile-commands="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ + --define sonar.coverageReportPaths=build/coverage.xml \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..b3c7ebd --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/lib/Files"] + path = tests/lib/Files + url = https://github.com/rmontanana/ArffFiles.git diff --git a/.vscode/launch.json b/.vscode/launch.json index fa381ef..1342f2d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,7 +8,7 @@ "name": "C++ Launch config", "type": "cppdbg", "request": "launch", - "program": "${workspaceFolder}/tests/build/Metrics_unittest", + "program": "${workspaceFolder}/tests/build/BinDisc_unittest", "cwd": "${workspaceFolder}/tests/build", "args": [], "launchCompleteCommand": "exec-run", diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f35cce..5559336 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,34 @@ cmake_minimum_required(VERSION 3.20) + project(mdlp) set(CMAKE_CXX_STANDARD 17) +cmake_policy(SET CMP0135 NEW) + find_package(Torch REQUIRED) -include_directories(${TORCH_INCLUDE_DIRS}) -add_library(mdlp CPPFImdlp.cpp Metrics.cpp BinDisc.cpp Discretizer.cpp) -target_link_libraries(mdlp "${TORCH_LIBRARIES}") -add_subdirectory(sample) + +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-elide-constructors") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") +if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline") +endif() + if (ENABLE_TESTING) + MESSAGE("Debug mode") + enable_testing() + set(CODE_COVERAGE ON) + SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage") add_subdirectory(tests) +else(ENABLE_TESTING) + MESSAGE("Release mode") endif(ENABLE_TESTING) + + +add_subdirectory(sample) + +include_directories( + ${TORCH_INCLUDE_DIRS} + ${mdlp_SOURCE_DIR}/src +) + +add_library(mdlp src/CPPFImdlp.cpp src/Metrics.cpp src/BinDisc.cpp src/Discretizer.cpp) +target_link_libraries(mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/Discretizer.cpp b/Discretizer.cpp deleted file mode 100644 index 9d637ca..0000000 --- a/Discretizer.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "Discretizer.h" - -namespace mdlp { - labels_t& Discretizer::transform(const samples_t& data) - { - discretizedData.clear(); - discretizedData.reserve(data.size()); - for (const precision_t& item : data) { - auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item); - discretizedData.push_back(upper - cutPoints.begin()); - } - return discretizedData; - } - labels_t& Discretizer::fit_transform(samples_t& X_, labels_t& y_) - { - fit(X_, y_); - return transform(X_); - } - void Discretizer::fit_t(torch::Tensor& X_, torch::Tensor& y_) - { - auto num_elements = X_.numel(); - samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); - labels_t y(y_.data_ptr(), y_.data_ptr() + num_elements); - fit(X, y); - } - torch::Tensor Discretizer::transform_t(torch::Tensor& X_) - { - auto num_elements = X_.numel(); - samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); - auto result = transform(X); - return torch::tensor(result, torch::kInt32); - } - torch::Tensor Discretizer::fit_transform_t(torch::Tensor& X_, torch::Tensor& y_) - { - auto num_elements = X_.numel(); - samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); - labels_t y(y_.data_ptr(), y_.data_ptr() + num_elements); - auto result = fit_transform(X, y); - return torch::tensor(result, torch::kInt32); - } -} \ No newline at end of file diff --git a/Discretizer.h b/Discretizer.h deleted file mode 100644 index 9749af8..0000000 --- a/Discretizer.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef DISCRETIZER_H -#define DISCRETIZER_H - -#include -#include -#include -#include "typesFImdlp.h" - -namespace mdlp { - class Discretizer { - public: - Discretizer() = default; - virtual ~Discretizer() = default; - inline cutPoints_t getCutPoints() const { return cutPoints; }; - virtual void fit(samples_t& X_, labels_t& y_) = 0; - labels_t& transform(const samples_t& data); - labels_t& fit_transform(samples_t& X_, labels_t& y_); - void fit_t(torch::Tensor& X_, torch::Tensor& y_); - torch::Tensor transform_t(torch::Tensor& X_); - torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_); - static inline std::string version() { return "1.2.2"; }; - protected: - labels_t discretizedData = labels_t(); - cutPoints_t cutPoints; - }; -} -#endif diff --git a/Makefile b/Makefile index eb45104..c0e145b 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,32 @@ SHELL := /bin/bash .DEFAULT_GOAL := build .PHONY: build test +lcov := lcov build: @if [ -d build_release ]; then rm -fr build_release; fi @mkdir build_release @cmake -B build_release -S . -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF - @cmake --build build_release + @cmake --build build_release -j 8 test: - @echo "Testing..." - @cd tests && ./test + @if [ -d build_debug ]; then rm -fr build_debug; fi + @mkdir build_debug + @cmake -B build_debug -S . -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON + @cmake --build build_debug -j 8 + @cd build_debug/tests && ctest --output-on-failure -j 8 + @cd build_debug/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --output-file coverage.info >/dev/null 2>&1; \ + $(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \ + $(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \ + $(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \ + $(lcov) --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \ + $(lcov) --remove coverage.info 'gtest/*' --output-file coverage.info >/dev/null 2>&1; + @genhtml build_debug/tests/coverage.info --demangle-cpp --output-directory build_debug/tests/coverage --title "Discretizer mdlp Coverage Report" -s -k -f --legend + @echo "* Coverage report is generated at build_debug/tests/coverage/index.html" + @which python || (echo ">>> Please install python"; exit 1) + @if [ ! -f build_debug/tests/coverage.info ]; then \ + echo ">>> No coverage.info file found!"; \ + exit 1; \ + fi + @echo ">>> Updating coverage badge..." + @env python update_coverage.py build_debug/tests \ No newline at end of file diff --git a/README.md b/README.md index a42900f..9bd53f9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ [![Build](https://github.com/rmontanana/mdlp/actions/workflows/build.yml/badge.svg)](https://github.com/rmontanana/mdlp/actions/workflows/build.yml) [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp) +[![Coverage Badge](https://img.shields.io/badge/Coverage-100,0%25-green)](html/index.html) # logo mdlp @@ -31,15 +32,14 @@ Other features: To run the sample, just execute the following commands: ```bash -cmake -B build -S . -cmake --build build -build/sample/sample -f iris -m 2 -build/sample/sample -h +make build +build_release/sample/sample -f iris -m 2 +build_release/sample/sample -h ``` ## Test -To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands: +To run the tests and see coverage (llvm with lcov and genhtml have to be installed), execute the following commands: ```bash make test diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 722f601..7eb8efb 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -2,5 +2,10 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_BUILD_TYPE Debug) -add_executable(sample sample.cpp ../tests/ArffFiles.cpp) +include_directories( + ${mdlp_SOURCE_DIR}/src + ${mdlp_SOURCE_DIR}/tests/lib/Files +) + +add_executable(sample sample.cpp ) target_link_libraries(sample mdlp "${TORCH_LIBRARIES}") diff --git a/sample/sample.cpp b/sample/sample.cpp index 376c407..465d92e 100644 --- a/sample/sample.cpp +++ b/sample/sample.cpp @@ -1,3 +1,9 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include #include #include @@ -6,10 +12,10 @@ #include #include #include -#include "../Discretizer.h" -#include "../CPPFImdlp.h" -#include "../BinDisc.h" -#include "../tests/ArffFiles.h" +#include +#include "Discretizer.h" +#include "CPPFImdlp.h" +#include "BinDisc.h" const string PATH = "tests/datasets/"; @@ -144,7 +150,7 @@ void process_file(const string& path, const string& file_name, bool class_last, auto result = test.fit_transform_t(Xt, yt); std::cout << "Transformed data (torch)...: " << std::endl; for (int i = 130; i < 135; i++) { - std::cout << std::fixed << std::setprecision(1) << Xt[i].item() << " " << result[i].item() << std::endl; + std::cout << std::fixed << std::setprecision(1) << Xt[i].item() << " " << result[i].item() << std::endl; } auto disc = mdlp::BinDisc(3); auto res_v = disc.fit_transform(X[0], y); @@ -152,7 +158,7 @@ void process_file(const string& path, const string& file_name, bool class_last, auto res_t = disc.transform_t(Xt); std::cout << "Transformed data (BinDisc)...: " << std::endl; for (int i = 130; i < 135; i++) { - std::cout << std::fixed << std::setprecision(1) << Xt[i].item() << " " << res_v[i] << " " << res_t[i].item() << std::endl; + std::cout << std::fixed << std::setprecision(1) << Xt[i].item() << " " << res_v[i] << " " << res_t[i].item() << std::endl; } } diff --git a/sonar-project.properties b/sonar-project.properties index e38e8a9..7bdc121 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -3,7 +3,7 @@ sonar.organization=rmontanana # This is the name and version displayed in the SonarCloud UI. sonar.projectName=mdlp -sonar.projectVersion=1.2.1 +sonar.projectVersion=2.0.0 # sonar.test.exclusions=tests/** # sonar.tests=tests/ # sonar.coverage.exclusions=tests/**,sample/** @@ -11,4 +11,4 @@ sonar.projectVersion=1.2.1 #sonar.sources=. # Encoding of the source code. Default is default system encoding -sonar.sourceEncoding=UTF-8 +sonar.sourceEncoding=UTF-8 \ No newline at end of file diff --git a/BinDisc.cpp b/src/BinDisc.cpp similarity index 53% rename from BinDisc.cpp rename to src/BinDisc.cpp index 551192c..4f13b09 100644 --- a/BinDisc.cpp +++ b/src/BinDisc.cpp @@ -1,5 +1,10 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include -#include #include #include "BinDisc.h" #include @@ -20,12 +25,15 @@ namespace mdlp { // y is included for compatibility with the Discretizer interface cutPoints.clear(); if (X.empty()) { - cutPoints.push_back(std::numeric_limits::max()); + cutPoints.push_back(0.0); + cutPoints.push_back(0.0); return; } if (strategy == strategy_t::QUANTILE) { + direction = bound_dir_t::RIGHT; fit_quantile(X); } else if (strategy == strategy_t::UNIFORM) { + direction = bound_dir_t::RIGHT; fit_uniform(X); } } @@ -35,65 +43,56 @@ namespace mdlp { } std::vector linspace(precision_t start, precision_t end, int num) { - // Doesn't include end point as it is not needed if (start == end) { - return { 0 }; + return { start, end }; } precision_t delta = (end - start) / static_cast(num - 1); std::vector linspc; - for (size_t i = 0; i < num - 1; ++i) { + for (size_t i = 0; i < num; ++i) { precision_t val = start + delta * static_cast(i); linspc.push_back(val); } return linspc; } - size_t clip(const size_t n, size_t lower, size_t upper) + size_t clip(const size_t n, const size_t lower, const size_t upper) { return std::max(lower, std::min(n, upper)); } - std::vector percentile(samples_t& data, std::vector& percentiles) + std::vector percentile(samples_t& data, const std::vector& percentiles) { // Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html std::vector results; + bool first = true; results.reserve(percentiles.size()); for (auto percentile : percentiles) { - const size_t i = static_cast(std::floor(static_cast(data.size() - 1) * percentile / 100.)); + const auto i = static_cast(std::floor(static_cast(data.size() - 1) * percentile / 100.)); const auto indexLower = clip(i, 0, data.size() - 2); - const double percentI = static_cast(indexLower) / static_cast(data.size() - 1); - const double fraction = + const precision_t percentI = static_cast(indexLower) / static_cast(data.size() - 1); + const precision_t fraction = (percentile / 100.0 - percentI) / - (static_cast(indexLower + 1) / static_cast(data.size() - 1) - percentI); - const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; - if (value != results.back()) + (static_cast(indexLower + 1) / static_cast(data.size() - 1) - percentI); + if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; value != results.back() || first) // first needed as results.back() return is undefined for empty vectors results.push_back(value); + first = false; } return results; } - void BinDisc::fit_quantile(samples_t& X) + void BinDisc::fit_quantile(const samples_t& X) { auto quantiles = linspace(0.0, 100.0, n_bins + 1); auto data = X; std::sort(data.begin(), data.end()); if (data.front() == data.back() || data.size() == 1) { - // if X is constant - cutPoints.push_back(std::numeric_limits::max()); + // if X is constant, pass any two given points that shall be ignored in transform + cutPoints.push_back(data.front()); + cutPoints.push_back(data.front()); return; } cutPoints = percentile(data, quantiles); - normalizeCutPoints(); } - void BinDisc::fit_uniform(samples_t& X) + void BinDisc::fit_uniform(const samples_t& X) { - - auto minmax = std::minmax_element(X.begin(), X.end()); - cutPoints = linspace(*minmax.first, *minmax.second, n_bins + 1); - normalizeCutPoints(); - } - void BinDisc::normalizeCutPoints() - { - // Add max value to the end - cutPoints.push_back(std::numeric_limits::max()); - // Remove first as it is not needed - cutPoints.erase(cutPoints.begin()); + auto [vmin, vmax] = std::minmax_element(X.begin(), X.end()); + cutPoints = linspace(*vmin, *vmax, n_bins + 1); } } \ No newline at end of file diff --git a/BinDisc.h b/src/BinDisc.h similarity index 61% rename from BinDisc.h rename to src/BinDisc.h index d1bb94b..0a082be 100644 --- a/BinDisc.h +++ b/src/BinDisc.h @@ -1,3 +1,9 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #ifndef BINDISC_H #define BINDISC_H @@ -18,9 +24,8 @@ namespace mdlp { void fit(samples_t& X_, labels_t& y) override; void fit(samples_t& X); private: - void fit_uniform(samples_t&); - void fit_quantile(samples_t&); - void normalizeCutPoints(); + void fit_uniform(const samples_t&); + void fit_quantile(const samples_t&); int n_bins; strategy_t strategy; }; diff --git a/CPPFImdlp.cpp b/src/CPPFImdlp.cpp similarity index 89% rename from CPPFImdlp.cpp rename to src/CPPFImdlp.cpp index c2d4733..d972604 100644 --- a/CPPFImdlp.cpp +++ b/src/CPPFImdlp.cpp @@ -1,3 +1,9 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include #include #include @@ -12,6 +18,7 @@ namespace mdlp { max_depth(max_depth_), proposed_cuts(proposed) { + direction = bound_dir_t::RIGHT; } size_t CPPFImdlp::compute_max_num_cut_points() const @@ -20,12 +27,12 @@ namespace mdlp { if (proposed_cuts == 0) { return numeric_limits::max(); } - if (proposed_cuts < 0 || proposed_cuts > static_cast(X.size())) { + if (proposed_cuts < 0 || proposed_cuts > static_cast(X.size())) { throw invalid_argument("wrong proposed num_cuts value"); } if (proposed_cuts < 1) - return static_cast(round(static_cast(X.size()) * proposed_cuts)); - return static_cast(proposed_cuts); + return static_cast(round(static_cast(X.size()) * proposed_cuts)); + return static_cast(proposed_cuts); // The 2 extra cutpoints should not be considered here as this parameter is considered before they are added } void CPPFImdlp::fit(samples_t& X_, labels_t& y_) @@ -58,6 +65,10 @@ namespace mdlp { resizeCutPoints(); } } + // Insert first & last X value to the cutpoints as them shall be ignored in transform + auto [vmin, vmax] = std::minmax_element(X.begin(), X.end()); + cutPoints.push_back(*vmax); + cutPoints.insert(cutPoints.begin(), *vmin); } pair CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) diff --git a/CPPFImdlp.h b/src/CPPFImdlp.h similarity index 82% rename from CPPFImdlp.h rename to src/CPPFImdlp.h index b832423..45fa65c 100644 --- a/CPPFImdlp.h +++ b/src/CPPFImdlp.h @@ -1,3 +1,9 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #ifndef CPPFIMDLP_H #define CPPFIMDLP_H diff --git a/src/Discretizer.cpp b/src/Discretizer.cpp new file mode 100644 index 0000000..1522fb2 --- /dev/null +++ b/src/Discretizer.cpp @@ -0,0 +1,54 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + +#include "Discretizer.h" + +namespace mdlp { + + labels_t& Discretizer::transform(const samples_t& data) + { + discretizedData.clear(); + discretizedData.reserve(data.size()); + // CutPoints always have at least two items + // Have to ignore first and last cut points provided + auto first = cutPoints.begin() + 1; + auto last = cutPoints.end() - 1; + auto bound = direction == bound_dir_t::LEFT ? std::lower_bound::iterator, precision_t> : std::upper_bound::iterator, precision_t>; + for (const precision_t& item : data) { + auto pos = bound(first, last, item); + auto number = pos - first; + discretizedData.push_back(static_cast(number)); + } + return discretizedData; + } + labels_t& Discretizer::fit_transform(samples_t& X_, labels_t& y_) + { + fit(X_, y_); + return transform(X_); + } + void Discretizer::fit_t(const torch::Tensor& X_, const torch::Tensor& y_) + { + auto num_elements = X_.numel(); + samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); + labels_t y(y_.data_ptr(), y_.data_ptr() + num_elements); + fit(X, y); + } + torch::Tensor Discretizer::transform_t(const torch::Tensor& X_) + { + auto num_elements = X_.numel(); + samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); + auto result = transform(X); + return torch::tensor(result, torch_label_t); + } + torch::Tensor Discretizer::fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_) + { + auto num_elements = X_.numel(); + samples_t X(X_.data_ptr(), X_.data_ptr() + num_elements); + labels_t y(y_.data_ptr(), y_.data_ptr() + num_elements); + auto result = fit_transform(X, y); + return torch::tensor(result, torch_label_t); + } +} \ No newline at end of file diff --git a/src/Discretizer.h b/src/Discretizer.h new file mode 100644 index 0000000..88db52a --- /dev/null +++ b/src/Discretizer.h @@ -0,0 +1,39 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + +#ifndef DISCRETIZER_H +#define DISCRETIZER_H + +#include +#include +#include "typesFImdlp.h" +#include + +namespace mdlp { + enum class bound_dir_t { + LEFT, + RIGHT + }; + const auto torch_label_t = torch::kInt32; + class Discretizer { + public: + Discretizer() = default; + virtual ~Discretizer() = default; + inline cutPoints_t getCutPoints() const { return cutPoints; }; + virtual void fit(samples_t& X_, labels_t& y_) = 0; + labels_t& transform(const samples_t& data); + labels_t& fit_transform(samples_t& X_, labels_t& y_); + void fit_t(const torch::Tensor& X_, const torch::Tensor& y_); + torch::Tensor transform_t(const torch::Tensor& X_); + torch::Tensor fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_); + static inline std::string version() { return "1.2.3"; }; + protected: + labels_t discretizedData = labels_t(); + cutPoints_t cutPoints; // At least two cutpoints must be provided, the first and the last will be ignored in transform + bound_dir_t direction; // used in transform + }; +} +#endif diff --git a/Metrics.cpp b/src/Metrics.cpp similarity index 90% rename from Metrics.cpp rename to src/Metrics.cpp index f3405e9..b76fbcc 100644 --- a/Metrics.cpp +++ b/src/Metrics.cpp @@ -1,3 +1,9 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include "Metrics.h" #include #include diff --git a/Metrics.h b/src/Metrics.h similarity index 67% rename from Metrics.h rename to src/Metrics.h index 4f8151a..eea4d4b 100644 --- a/Metrics.h +++ b/src/Metrics.h @@ -1,3 +1,9 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #ifndef CCMETRICS_H #define CCMETRICS_H diff --git a/typesFImdlp.h b/src/typesFImdlp.h similarity index 86% rename from typesFImdlp.h rename to src/typesFImdlp.h index a0b6d54..0b5c5c7 100644 --- a/typesFImdlp.h +++ b/src/typesFImdlp.h @@ -8,8 +8,9 @@ using namespace std; namespace mdlp { typedef float precision_t; + typedef int label_t; typedef std::vector samples_t; - typedef std::vector labels_t; + typedef std::vector labels_t; typedef std::vector indices_t; typedef std::vector cutPoints_t; typedef std::map, precision_t> cacheEnt_t; diff --git a/tests/ArffFiles.cpp b/tests/ArffFiles.cpp deleted file mode 100644 index a95e244..0000000 --- a/tests/ArffFiles.cpp +++ /dev/null @@ -1,132 +0,0 @@ -#include "ArffFiles.h" -#include -#include -#include - -using namespace std; - -ArffFiles::ArffFiles() = default; - -vector ArffFiles::getLines() const -{ - return lines; -} - -unsigned long int ArffFiles::getSize() const -{ - return lines.size(); -} - -vector> ArffFiles::getAttributes() const -{ - return attributes; -} - -string ArffFiles::getClassName() const -{ - return className; -} - -string ArffFiles::getClassType() const -{ - return classType; -} - -vector& ArffFiles::getX() -{ - return X; -} - -vector& ArffFiles::getY() -{ - return y; -} - -void ArffFiles::load(const string& fileName, bool classLast) -{ - ifstream file(fileName); - if (!file.is_open()) { - throw invalid_argument("Unable to open file"); - } - string line; - string keyword; - string attribute; - string type; - string type_w; - while (getline(file, line)) { - if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { - continue; - } - if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { - stringstream ss(line); - ss >> keyword >> attribute; - type = ""; - while (ss >> type_w) - type += type_w + " "; - attributes.emplace_back(trim(attribute), trim(type)); - continue; - } - if (line[0] == '@') { - continue; - } - lines.push_back(line); - } - file.close(); - if (attributes.empty()) - throw invalid_argument("No attributes found"); - if (classLast) { - className = get<0>(attributes.back()); - classType = get<1>(attributes.back()); - attributes.pop_back(); - } else { - className = get<0>(attributes.front()); - classType = get<1>(attributes.front()); - attributes.erase(attributes.begin()); - } - generateDataset(classLast); - -} - -void ArffFiles::generateDataset(bool classLast) -{ - X = vector(attributes.size(), mdlp::samples_t(lines.size())); - auto yy = vector(lines.size(), ""); - int labelIndex = classLast ? static_cast(attributes.size()) : 0; - for (size_t i = 0; i < lines.size(); i++) { - stringstream ss(lines[i]); - string value; - int pos = 0; - int xIndex = 0; - while (getline(ss, value, ',')) { - if (pos++ == labelIndex) { - yy[i] = value; - } else { - X[xIndex++][i] = stof(value); - } - } - } - y = factorize(yy); -} - -string ArffFiles::trim(const string& source) -{ - string s(source); - s.erase(0, s.find_first_not_of(" '\n\r\t")); - s.erase(s.find_last_not_of(" '\n\r\t") + 1); - return s; -} - -vector ArffFiles::factorize(const vector& labels_t) -{ - vector yy; - yy.reserve(labels_t.size()); - map labelMap; - int i = 0; - for (const string& label : labels_t) { - if (labelMap.find(label) == labelMap.end()) { - labelMap[label] = i++; - } - yy.push_back(labelMap[label]); - } - return yy; -} \ No newline at end of file diff --git a/tests/ArffFiles.h b/tests/ArffFiles.h deleted file mode 100644 index f36d9d3..0000000 --- a/tests/ArffFiles.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef ARFFFILES_H -#define ARFFFILES_H - -#include -#include -#include "../typesFImdlp.h" - -using namespace std; - -class ArffFiles { -private: - vector lines; - vector> attributes; - string className; - string classType; - vector X; - vector y; - - void generateDataset(bool); - -public: - ArffFiles(); - void load(const string&, bool = true); - vector getLines() const; - unsigned long int getSize() const; - string getClassName() const; - string getClassType() const; - static string trim(const string&); - vector& getX(); - vector& getY(); - vector> getAttributes() const; - static vector factorize(const vector& labels_t); -}; - -#endif \ No newline at end of file diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index 2d4437c..e19c727 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -1,9 +1,16 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include #include #include #include "gtest/gtest.h" -#include "ArffFiles.h" -#include "../BinDisc.h" +#include +#include "BinDisc.h" +#include "Experiments.hpp" namespace mdlp { const float margin = 1e-4; @@ -40,10 +47,11 @@ namespace mdlp { auto y = labels_t(); fit(X, y); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(3.66667, cuts.at(0), margin); - EXPECT_NEAR(6.33333, cuts.at(1), margin); - EXPECT_EQ(numeric_limits::max(), cuts.at(2)); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(3.66667, cuts.at(1), margin); + EXPECT_NEAR(6.33333, cuts.at(2), margin); + EXPECT_NEAR(9.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -53,10 +61,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(3.666667, cuts[0], margin); - EXPECT_NEAR(6.333333, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts[0], margin); + EXPECT_NEAR(3.666667, cuts[1], margin); + EXPECT_NEAR(6.333333, cuts[2], margin); + EXPECT_NEAR(9, cuts[3], margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -66,10 +75,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_EQ(4.0, cuts[0]); - EXPECT_EQ(7.0, cuts[1]); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -79,10 +89,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_EQ(4, cuts[0]); - EXPECT_EQ(7, cuts[1]); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -92,10 +103,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(4.33333, cuts[0], margin); - EXPECT_NEAR(7.66667, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.33333, cuts.at(1), margin); + EXPECT_NEAR(7.66667, cuts.at(2), margin); + EXPECT_NEAR(11.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -105,10 +117,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(4.33333, cuts[0], margin); - EXPECT_NEAR(7.66667, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.33333, cuts.at(1), margin); + EXPECT_NEAR(7.66667, cuts.at(2), margin); + EXPECT_NEAR(11.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -118,8 +131,9 @@ namespace mdlp { samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1, cuts.at(1), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 0, 0 }; EXPECT_EQ(expected, labels); @@ -129,8 +143,9 @@ namespace mdlp { samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1, cuts.at(1), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 0, 0 }; EXPECT_EQ(expected, labels); @@ -140,16 +155,18 @@ namespace mdlp { samples_t X = {}; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(0, cuts.at(0), margin); + EXPECT_NEAR(0, cuts.at(1), margin); } TEST_F(TestBinDisc3Q, EmptyQuantile) { samples_t X = {}; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(0, cuts.at(0), margin); + EXPECT_NEAR(0, cuts.at(1), margin); } TEST(TestBinDisc3, ExceptionNumberBins) { @@ -160,10 +177,11 @@ namespace mdlp { samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(1.66667, cuts[0], margin); - EXPECT_NEAR(2.33333, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1.66667, cuts.at(1), margin); + EXPECT_NEAR(2.33333, cuts.at(2), margin); + EXPECT_NEAR(3.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 }; EXPECT_EQ(expected, labels); @@ -174,9 +192,10 @@ namespace mdlp { samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(2, cuts.size()); - EXPECT_NEAR(1.66667, cuts[0], margin); - EXPECT_EQ(numeric_limits::max(), cuts[1]); + ASSERT_EQ(3, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1.66667, cuts.at(1), margin); + EXPECT_NEAR(3.0, cuts.at(2), margin); auto labels = transform(X); labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 }; EXPECT_EQ(expected, labels); @@ -187,11 +206,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - ASSERT_EQ(3.75, cuts[0]); - EXPECT_EQ(6.5, cuts[1]); - EXPECT_EQ(9.25, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(3.75, cuts.at(1), margin); + EXPECT_NEAR(6.5, cuts.at(2), margin); + EXPECT_NEAR(9.25, cuts.at(3), margin); + EXPECT_NEAR(12.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -201,11 +221,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - ASSERT_EQ(3.75, cuts[0]); - EXPECT_EQ(6.5, cuts[1]); - EXPECT_EQ(9.25, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(3.75, cuts.at(1), margin); + EXPECT_NEAR(6.5, cuts.at(2), margin); + EXPECT_NEAR(9.25, cuts.at(3), margin); + EXPECT_NEAR(12.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -215,11 +236,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.0, cuts[0]); - EXPECT_EQ(7.0, cuts[1]); - EXPECT_EQ(10.0, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); + EXPECT_NEAR(13.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -229,11 +251,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.0, cuts[0]); - EXPECT_EQ(7.0, cuts[1]); - EXPECT_EQ(10.0, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); + EXPECT_NEAR(13.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -243,11 +266,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.25, cuts[0]); - EXPECT_EQ(7.5, cuts[1]); - EXPECT_EQ(10.75, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.25, cuts.at(1), margin); + EXPECT_NEAR(7.5, cuts.at(2), margin); + EXPECT_NEAR(10.75, cuts.at(3), margin); + EXPECT_NEAR(14.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -257,11 +281,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.25, cuts[0]); - EXPECT_EQ(7.5, cuts[1]); - EXPECT_EQ(10.75, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.25, cuts.at(1), margin); + EXPECT_NEAR(7.5, cuts.at(2), margin); + EXPECT_NEAR(10.75, cuts.at(3), margin); + EXPECT_NEAR(14.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -271,11 +296,12 @@ namespace mdlp { samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.5, cuts[0]); - EXPECT_EQ(8, cuts[1]); - EXPECT_EQ(11.5, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.5, cuts.at(1), margin); + EXPECT_NEAR(8, cuts.at(2), margin); + EXPECT_NEAR(11.5, cuts.at(3), margin); + EXPECT_NEAR(15.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 }; EXPECT_EQ(expected, labels); @@ -285,11 +311,12 @@ namespace mdlp { samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.5, cuts[0]); - EXPECT_EQ(8, cuts[1]); - EXPECT_EQ(11.5, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.5, cuts.at(1), margin); + EXPECT_NEAR(8, cuts.at(2), margin); + EXPECT_NEAR(11.5, cuts.at(3), margin); + EXPECT_NEAR(15.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 }; EXPECT_EQ(expected, labels); @@ -300,11 +327,12 @@ namespace mdlp { // 0 1 2 3 4 5 6 7 8 9 fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(1.0, cuts[0]); - EXPECT_EQ(2.0, cuts[1]); - ASSERT_EQ(3.0, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(0.0, cuts.at(0), margin); + EXPECT_NEAR(1.0, cuts.at(1), margin); + EXPECT_NEAR(2.0, cuts.at(2), margin); + EXPECT_NEAR(3.0, cuts.at(3), margin); + EXPECT_NEAR(4.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -315,50 +343,69 @@ namespace mdlp { // 0 1 2 3 4 5 6 7 8 9 fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_EQ(2.0, cuts[0]); - ASSERT_EQ(3.0, cuts[1]); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(0.0, cuts.at(0), margin); + EXPECT_NEAR(1.0, cuts.at(1), margin); + EXPECT_NEAR(2.0, cuts.at(2), margin); + EXPECT_NEAR(3.0, cuts.at(3), margin); + EXPECT_NEAR(4.0, cuts.at(4), margin); auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 }; + labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); } - TEST_F(TestBinDisc4U, irisUniform) + TEST(TestBinDiscGeneric, Fileset) { - ArffFiles file; - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - fit(X[0]); - auto Xt = transform(X[0]); - labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - EXPECT_EQ(expected, Xt); - auto Xtt = fit_transform(X[0], file.getY()); - EXPECT_EQ(expected, Xtt); - auto Xt_t = torch::tensor(X[0], torch::kFloat32); - auto y_t = torch::tensor(file.getY(), torch::kInt32); - auto Xtt_t = fit_transform_t(Xt_t, y_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xtt_t[i].item()); - } - TEST_F(TestBinDisc4Q, irisQuantile) - { - ArffFiles file; - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - fit(X[0]); - auto Xt = transform(X[0]); - labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; - EXPECT_EQ(expected, Xt); - auto Xtt = fit_transform(X[0], file.getY()); - EXPECT_EQ(expected, Xtt); - auto Xt_t = torch::tensor(X[0], torch::kFloat32); - auto y_t = torch::tensor(file.getY(), torch::kInt32); - auto Xtt_t = fit_transform_t(Xt_t, y_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xtt_t[i].item()); - fit_t(Xt_t, y_t); - auto Xt_t2 = transform_t(Xt_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xt_t2[i].item()); + Experiments exps(data_path + "tests.txt"); + int num = 0; + while (exps.is_next()) { + ++num; + Experiment exp = exps.next(); + BinDisc disc(exp.n_bins_, exp.strategy_[0] == 'Q' ? strategy_t::QUANTILE : strategy_t::UNIFORM); + std::vector test; + if (exp.type_ == experiment_t::RANGE) { + for (float i = exp.from_; i < exp.to_; i += exp.step_) { + test.push_back(i); + } + } else { + test = exp.dataset_; + } + // show_vector(test, "Test"); + auto empty = std::vector(); + auto Xt = disc.fit_transform(test, empty); + auto cuts = disc.getCutPoints(); + EXPECT_EQ(exp.discretized_data_.size(), Xt.size()); + auto flag = false; + size_t n_errors = 0; + if (num < 40) { + // + // Check discretization of only the first 40 tests as after we cannot ensure the same codification due to precision problems + // + for (int i = 0; i < exp.discretized_data_.size(); ++i) { + if (exp.discretized_data_.at(i) != Xt.at(i)) { + if (!flag) { + if (exp.type_ == experiment_t::RANGE) + std::cout << "+Exp #: " << num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl; + else { + std::cout << "+Exp #: " << num << " strategy: " << exp.strategy_ << " " << " n_bins: " << exp.n_bins_ << " "; + show_vector(exp.dataset_, "Dataset"); + } + show_vector(cuts, "Cuts"); + std::cout << "Error at " << i << " test[i]=" << test.at(i) << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl; + flag = true; + EXPECT_EQ(exp.discretized_data_.at(i), Xt.at(i)); + } + n_errors++; + } + } + if (flag) { + std::cout << "*** Found " << n_errors << " mistakes in this experiment dataset" << std::endl; + } + } + EXPECT_EQ(exp.cutpoints_.size(), cuts.size()); + for (int i = 0; i < exp.cutpoints_.size(); ++i) { + EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin); + } + } + std::cout << "* Number of experiments tested: " << num << std::endl; } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 64366f2..ac4723a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,3 @@ -cmake_minimum_required(VERSION 3.20) -set(CMAKE_CXX_STANDARD 17) -cmake_policy(SET CMP0135 NEW) include(FetchContent) include_directories(${GTEST_INCLUDE_DIRS}) FetchContent_Declare( @@ -11,28 +8,30 @@ FetchContent_Declare( set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) -find_package(Torch REQUIRED) +include_directories( + ${TORCH_INCLUDE_DIRS} + ${mdlp_SOURCE_DIR}/src + ${mdlp_SOURCE_DIR}/tests/lib/Files +) -enable_testing() - -include_directories(${TORCH_INCLUDE_DIRS}) - -add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp) +add_executable(Metrics_unittest ${mdlp_SOURCE_DIR}/src/Metrics.cpp Metrics_unittest.cpp) target_link_libraries(Metrics_unittest GTest::gtest_main) target_compile_options(Metrics_unittest PRIVATE --coverage) target_link_options(Metrics_unittest PRIVATE --coverage) -add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp ../Discretizer.cpp) +add_executable(FImdlp_unittest FImdlp_unittest.cpp +${mdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${mdlp_SOURCE_DIR}/src/Metrics.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp) target_link_libraries(FImdlp_unittest GTest::gtest_main "${TORCH_LIBRARIES}") target_compile_options(FImdlp_unittest PRIVATE --coverage) target_link_options(FImdlp_unittest PRIVATE --coverage) -add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp ../Discretizer.cpp) +add_executable(BinDisc_unittest BinDisc_unittest.cpp ${mdlp_SOURCE_DIR}/src/BinDisc.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp) target_link_libraries(BinDisc_unittest GTest::gtest_main "${TORCH_LIBRARIES}") target_compile_options(BinDisc_unittest PRIVATE --coverage) target_link_options(BinDisc_unittest PRIVATE --coverage) -add_executable(Discretizer_unittest ../BinDisc.cpp ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp ../Discretizer.cpp Discretizer_unittest.cpp) +add_executable(Discretizer_unittest Discretizer_unittest.cpp +${mdlp_SOURCE_DIR}/src/BinDisc.cpp ${mdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${mdlp_SOURCE_DIR}/src/Metrics.cpp ${mdlp_SOURCE_DIR}/src/Discretizer.cpp ) target_link_libraries(Discretizer_unittest GTest::gtest_main "${TORCH_LIBRARIES}") target_compile_options(Discretizer_unittest PRIVATE --coverage) target_link_options(Discretizer_unittest PRIVATE --coverage) diff --git a/tests/Discretizer_unittest.cpp b/tests/Discretizer_unittest.cpp index 8c8f201..70baada 100644 --- a/tests/Discretizer_unittest.cpp +++ b/tests/Discretizer_unittest.cpp @@ -1,11 +1,17 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include #include #include +#include #include "gtest/gtest.h" -#include "ArffFiles.h" -#include "../Discretizer.h" -#include "../BinDisc.h" -#include "../CPPFImdlp.h" +#include "Discretizer.h" +#include "BinDisc.h" +#include "CPPFImdlp.h" namespace mdlp { const float margin = 1e-4; @@ -20,7 +26,15 @@ namespace mdlp { return "../../tests/datasets/"; } const std::string data_path = set_data_path(); - + const labels_t iris_quantile = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; + TEST(Discretizer, Version) + { + Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM); + auto version = disc->version(); + delete disc; + std::cout << "Version computed: " << version; + EXPECT_EQ("1.2.3", version); + } TEST(Discretizer, BinIrisUniform) { ArffFiles file; @@ -43,12 +57,198 @@ namespace mdlp { auto y = labels_t(); disc->fit(X[0], y); auto Xt = disc->transform(X[0]); - labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; delete disc; - EXPECT_EQ(expected, Xt); + EXPECT_EQ(iris_quantile, Xt); } + + TEST(Discretizer, BinIrisQuantileTorch) + { + ArffFiles file; + Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE); + file.load(data_path + "iris.arff", true); + auto X = file.getX(); + auto y = file.getY(); + auto X_torch = torch::tensor(X[0], torch::kFloat32); + auto yt = torch::tensor(y, torch::kInt32); + disc->fit_t(X_torch, yt); + torch::Tensor Xt = disc->transform_t(X_torch); + delete disc; + EXPECT_EQ(iris_quantile.size(), Xt.size(0)); + for (int i = 0; i < iris_quantile.size(); ++i) { + EXPECT_EQ(iris_quantile.at(i), Xt[i].item()); + } + } + TEST(Discretizer, BinIrisQuantileTorchFit_transform) + { + ArffFiles file; + Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE); + file.load(data_path + "iris.arff", true); + auto X = file.getX(); + auto y = file.getY(); + auto X_torch = torch::tensor(X[0], torch::kFloat32); + auto yt = torch::tensor(y, torch::kInt32); + torch::Tensor Xt = disc->fit_transform_t(X_torch, yt); + delete disc; + EXPECT_EQ(iris_quantile.size(), Xt.size(0)); + for (int i = 0; i < iris_quantile.size(); ++i) { + EXPECT_EQ(iris_quantile.at(i), Xt[i].item()); + } + } + TEST(Discretizer, FImdlpIris) { + auto labelsq = { + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 3, + 3, + 3, + 1, + 3, + 1, + 2, + 0, + 3, + 1, + 0, + 2, + 2, + 2, + 1, + 3, + 1, + 2, + 2, + 1, + 2, + 2, + 2, + 2, + 3, + 3, + 3, + 3, + 2, + 1, + 1, + 1, + 2, + 2, + 1, + 2, + 3, + 2, + 1, + 1, + 1, + 2, + 2, + 0, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 2, + 3, + 2, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 1, + 2, + 3, + 3, + 3, + 3, + 2, + 3, + 1, + 3, + 2, + 3, + 3, + 2, + 2, + 3, + 3, + 3, + 3, + 3, + 2, + 2, + 3, + 2, + 3, + 2, + 3, + 3, + 3, + 2, + 3, + 3, + 3, + 2, + 3, + 2, + 2, + }; labels_t expected = { 5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5, diff --git a/tests/Experiments.hpp b/tests/Experiments.hpp new file mode 100644 index 0000000..40fd6e8 --- /dev/null +++ b/tests/Experiments.hpp @@ -0,0 +1,139 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + +#ifndef EXPERIMENTS_HPP +#define EXPERIMENTS_HPP +#include +#include +#include +#include +#include +#include +#include "typesFImdlp.h" + +template +void show_vector(const std::vector& data, std::string title) +{ + std::cout << title << ": "; + std::string sep = ""; + for (const auto& d : data) { + std::cout << sep << d; + sep = ", "; + } + std::cout << std::endl; +} +enum class experiment_t { + RANGE, + VECTOR +}; +class Experiment { +public: + Experiment(float from_, float to_, float step_, int n_bins, std::string strategy, std::vector data_discretized, std::vector cutpoints) : + from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::RANGE } + { + validate_strategy(); + + } + Experiment(std::vector dataset, int n_bins, std::string strategy, std::vector data_discretized, std::vector cutpoints) : + n_bins_{ n_bins }, strategy_{ strategy }, dataset_{ dataset }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::VECTOR } + { + validate_strategy(); + } + void validate_strategy() + { + if (strategy_ != "Q" && strategy_ != "U") { + throw std::invalid_argument("Invalid strategy " + strategy_); + } + } + float from_; + float to_; + float step_; + int n_bins_; + std::string strategy_; + std::vector dataset_; + std::vector discretized_data_; + std::vector cutpoints_; + experiment_t type_; +}; +class Experiments { +public: + Experiments(const std::string filename) : filename{ filename } + { + test_file.open(filename); + if (!test_file.is_open()) { + throw std::runtime_error("File " + filename + " not found"); + } + exp_end = false; + } + ~Experiments() + { + test_file.close(); + } + bool end() const + { + return exp_end; + } + bool is_next() + { + while (std::getline(test_file, line) && line[0] == '#'); + if (test_file.eof()) { + exp_end = true; + return false; + } + return true; + } + Experiment next() + { + return parse_experiment(line); + } +private: + std::tuple parse_header(const std::string& line) + { + std::istringstream iss(line); + std::string from_, to_, step_, n_bins, strategy; + iss >> from_ >> to_ >> step_ >> n_bins >> strategy; + return { std::stof(from_), std::stof(to_), std::stof(step_), std::stoi(n_bins), strategy }; + } + template + std::vector parse_vector(const std::string& line) + { + std::istringstream iss(line); + std::vector data; + std::string d; + while (iss >> d) { + data.push_back(std::is_same::value ? std::stof(d) : std::stoi(d)); + } + return data; + } + Experiment parse_experiment(std::string& line) + { + // Read experiment lines + std::string experiment, data, cuts, strategy; + std::getline(test_file, experiment); + std::getline(test_file, data); + std::getline(test_file, cuts); + // split data into variables + float from_, to_, step_; + int n_bins; + std::vector dataset; + auto data_discretized = parse_vector(data); + auto cutpoints = parse_vector(cuts); + if (line == "RANGE") { + tie(from_, to_, step_, n_bins, strategy) = parse_header(experiment); + return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints }; + } + strategy = experiment.substr(0, 1); + n_bins = std::stoi(experiment.substr(1, 1)); + data = experiment.substr(3, experiment.size() - 4); + dataset = parse_vector(data); + return Experiment(dataset, n_bins, strategy, data_discretized, cutpoints); + } + std::ifstream test_file; + std::string filename; + std::string line; + bool exp_end; +}; +#endif \ No newline at end of file diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index b439631..26ae424 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -1,9 +1,15 @@ -#include "gtest/gtest.h" -#include "../Metrics.h" -#include "../CPPFImdlp.h" +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include #include -#include "ArffFiles.h" +#include +#include "gtest/gtest.h" +#include "Metrics.h" +#include "CPPFImdlp.h" #define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \ try { \ @@ -124,7 +130,7 @@ namespace mdlp { { samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 }; labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 }; - cutPoints_t expected = { 1.5f, 2.5f }; + cutPoints_t expected = { 1.0, 1.5f, 2.5f, 4.0 }; fit(X_, y_); auto computed = getCutPoints(); EXPECT_EQ(computed.size(), expected.size()); @@ -167,29 +173,31 @@ namespace mdlp { y = { 1 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 0); + EXPECT_EQ(computed.size(), 2); X = { 1, 3 }; y = { 1, 2 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 0); + EXPECT_EQ(computed.size(), 2); X = { 2, 4 }; y = { 1, 2 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 0); + EXPECT_EQ(computed.size(), 2); X = { 1, 2, 3 }; y = { 1, 2, 2 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 1); - EXPECT_NEAR(computed[0], 1.5, precision); + EXPECT_EQ(computed.size(), 3); + EXPECT_NEAR(computed[0], 1, precision); + EXPECT_NEAR(computed[1], 1.5, precision); + EXPECT_NEAR(computed[2], 3, precision); } TEST_F(TestFImdlp, TestArtificialDataset) { fit(X, y); - cutPoints_t expected = { 5.05f }; + cutPoints_t expected = { 4.7, 5.05, 6.0 }; vector computed = getCutPoints(); EXPECT_EQ(computed.size(), expected.size()); for (unsigned long i = 0; i < computed.size(); i++) { @@ -200,10 +208,10 @@ namespace mdlp { TEST_F(TestFImdlp, TestIris) { vector expected = { - {5.45f, 5.75f}, - {2.75f, 2.85f, 2.95f, 3.05f, 3.35f}, - {2.45f, 4.75f, 5.05f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.75f, 2.85f, 2.95f, 3.05f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 5.05f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 3, 5, 4, 3 }; auto test = CPPFImdlp(); @@ -213,7 +221,7 @@ namespace mdlp { TEST_F(TestFImdlp, ComputeCutPointsGCase) { cutPoints_t expected; - expected = { 1.5 }; + expected = { 0, 1.5, 2 }; samples_t X_ = { 0, 1, 2, 2, 2 }; labels_t y_ = { 1, 1, 1, 2, 2 }; fit(X_, y_); @@ -247,10 +255,10 @@ namespace mdlp { // Set max_depth to 1 auto test = CPPFImdlp(3, 1, 0); vector expected = { - {5.45f}, - {3.35f}, - {2.45f}, - {0.8f} + {4.3, 5.45f, 7.9}, + {2, 3.35f, 4.4}, + {1, 2.45f, 6.9}, + {0.1, 0.8f, 2.5} }; vector depths = { 1, 1, 1, 1 }; test_dataset(test, "iris", expected, depths); @@ -261,10 +269,10 @@ namespace mdlp { auto test = CPPFImdlp(75, 100, 0); // Set min_length to 75 vector expected = { - {5.45f, 5.75f}, - {2.85f, 3.35f}, - {2.45f, 4.75f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.85f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 3, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -275,10 +283,10 @@ namespace mdlp { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 0); vector expected = { - {5.45f, 5.75f}, - {2.85f, 3.35f}, - {2.45f, 4.75f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.85f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -289,10 +297,10 @@ namespace mdlp { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 1); vector expected = { - {5.45f}, - {2.85f}, - {2.45f}, - {0.8f} + {4.3, 5.45f, 7.9}, + {2, 2.85f, 4.4}, + {1, 2.45f, 6.9}, + {0.1, 0.8f, 2.5} }; vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -304,10 +312,10 @@ namespace mdlp { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 0.2f); vector expected = { - {5.45f, 5.75f}, - {2.85f, 3.35f}, - {2.45f, 4.75f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.85f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -327,7 +335,6 @@ namespace mdlp { computed = compute_max_num_cut_points(); ASSERT_EQ(expected, computed); } - } TEST_F(TestFImdlp, TransformTest) { @@ -345,15 +352,15 @@ namespace mdlp { vector& X = file.getX(); labels_t& y = file.getY(); fit(X[1], y); - // auto computed = transform(X[1]); - // EXPECT_EQ(computed.size(), expected.size()); - // for (unsigned long i = 0; i < computed.size(); i++) { - // EXPECT_EQ(computed[i], expected[i]); - // } - // auto computed_ft = fit_transform(X[1], y); - // EXPECT_EQ(computed_ft.size(), expected.size()); - // for (unsigned long i = 0; i < computed_ft.size(); i++) { - // EXPECT_EQ(computed_ft[i], expected[i]); - // } + auto computed = transform(X[1]); + EXPECT_EQ(computed.size(), expected.size()); + for (unsigned long i = 0; i < computed.size(); i++) { + EXPECT_EQ(computed[i], expected[i]); + } + auto computed_ft = fit_transform(X[1], y); + EXPECT_EQ(computed_ft.size(), expected.size()); + for (unsigned long i = 0; i < computed_ft.size(); i++) { + EXPECT_EQ(computed_ft[i], expected[i]); + } } } diff --git a/tests/Metrics_unittest.cpp b/tests/Metrics_unittest.cpp index 83f5cb8..40389fc 100644 --- a/tests/Metrics_unittest.cpp +++ b/tests/Metrics_unittest.cpp @@ -1,5 +1,11 @@ +// **************************************************************** +// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX - FileType: SOURCE +// SPDX - License - Identifier: MIT +// **************************************************************** + #include "gtest/gtest.h" -#include "../Metrics.h" +#include "Metrics.h" namespace mdlp { class TestMetrics : public Metrics, public testing::Test { diff --git a/tests/datasets/tests.txt b/tests/datasets/tests.txt new file mode 100644 index 0000000..3ebc4af --- /dev/null +++ b/tests/datasets/tests.txt @@ -0,0 +1,222 @@ +# +# from, to, step, #bins, Q/U +# discretized data +# cut points +# +# +# Range experiments +# +RANGE +0, 100, 1, 4, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 24.75, 49.5, 74.25, 99.0 +RANGE +0, 50, 1, 4, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 12.25, 24.5, 36.75, 49.0 +RANGE +0, 100, 1, 3, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 33.0, 66.0, 99.0 +RANGE +0, 50, 1, 3, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 16.33333, 32.66667, 49.0 +RANGE +0, 10, 1, 3, Q +0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +0.0, 3.0, 6.0, 9.0 +RANGE +0, 100, 1, 4, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 24.75, 49.5, 74.25, 99.0 +RANGE +0, 50, 1, 4, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 12.25, 24.5, 36.75, 49.0 +RANGE +0, 100, 1, 3, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 33.0, 66.0, 99.0 +RANGE +0, 50, 1, 3, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 16.33333, 32.66667, 49.0 +RANGE +0, 10, 1, 3, U +0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +0.0, 3.0, 6.0, 9.0 +RANGE +1, 10, 1, 3, Q +0, 0, 0, 1, 1, 1, 2, 2, 2 +1.0, 3.66667, 6.33333, 9.0 +RANGE +1, 10, 1, 3, U +0, 0, 0, 1, 1, 1, 2, 2, 2 +1.0, 3.66667, 6.33333, 9.0 +RANGE +1, 11, 1, 3, Q +0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.0, 7.0, 10.0 +RANGE +1, 11, 1, 3, U +0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.0, 7.0, 10.0 +RANGE +1, 12, 1, 3, Q +0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.33333, 7.66667, 11.0 +RANGE +1, 12, 1, 3, U +0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.33333, 7.66667, 11.0 +RANGE +1, 13, 1, 3, Q +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +RANGE +1, 13, 1, 3, U +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +RANGE +1, 14, 1, 3, Q +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +RANGE +1, 14, 1, 3, U +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +RANGE +1, 15, 1, 3, Q +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +RANGE +1, 15, 1, 3, U +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +# +# Vector experiments +# +VECTOR +Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0] +1, 0, 0, 1, 0, 0, 1, 0, 0 +1.0, 1.66667, 3.0 +VECTOR +U3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0] +2, 0, 0, 2, 0, 0, 2, 0, 0 +1.0, 1.66667, 2.33333, 3.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +Q3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] +2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +U3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] +2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +Q3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0] +0, 1, 1, 1, 1, 1, 2, 2, 2, 2 +0.0, 1.0, 3.0, 4.0 +VECTOR +U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0] +0, 0, 0, 0, 1, 1, 2, 2, 2, 2 +0.0, 1.33333, 2.66667, 4.0 +# +# Vector experiments with iris +# +VECTOR +Q3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1 +4.3, 5.4, 6.3, 7.9 +VECTOR +U3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1, 2, 0, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1 +4.3, 5.5, 6.7, 7.9 +VECTOR +Q4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 +4.3, 5.1, 5.8, 6.4, 7.9 +VECTOR +U4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 +4.3, 5.2, 6.1, 7.0, 7.9 +VECTOR +Q3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, 1, 0, 0, 2, 1, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 0, 1, 0, 2, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 1 +2.0, 2.9, 3.2, 4.4 +VECTOR +U3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 +2.0, 2.8, 3.6, 4.4 +VECTOR +Q4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +3, 2, 2, 2, 3, 3, 3, 3, 1, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 0, 2, 3, 3, 2, 3, 2, 3, 3, 2, 2, 2, 0, 1, 1, 3, 0, 1, 0, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 0, 0, 0, 0, 0, 2, 3, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 1, 1, 0, 1, 3, 0, 2, 1, 2, 2, 0, 1, 0, 3, 2, 0, 2, 0, 1, 2, 2, 3, 0, 0, 2, 1, 1, 0, 3, 2, 1, 2, 1, 2, 1, 3, 1, 1, 0, 2, 3, 2, 2, 2, 2, 2, 0, 2, 3, 2, 0, 2, 3, 2 +2.0, 2.8, 3.0, 3.3, 4.4 +VECTOR +U4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +2, 1, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 3, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 3, 1, 3, 2, 2, 2, 2, 2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, 1, 3, 1, 0, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 0, 1, 2, 1 +2.0, 2.6, 3.2, 3.8, 4.4 +VECTOR +Q3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +1.0, 2.63333, 4.9, 6.9 +VECTOR +U3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +1.0, 2.96667, 4.93333, 6.9 +VECTOR +Q4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3 +1.0, 1.6, 4.35, 5.1, 6.9 +VECTOR +U4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 3, 2, 2, 2, 2, 2 +1.0, 2.475, 3.95, 5.425, 6.9 +VECTOR +Q3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.1, 0.86667, 1.6, 2.5 +VECTOR +U3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.1, 0.9, 1.7, 2.5 +VECTOR +Q4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.1, 0.3, 1.3, 1.8, 2.5 +VECTOR +U4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2 +0.1, 0.7, 1.3, 1.9, 2.5 diff --git a/tests/lib/Files b/tests/lib/Files new file mode 160000 index 0000000..a531692 --- /dev/null +++ b/tests/lib/Files @@ -0,0 +1 @@ +Subproject commit a5316928d408266aa425f64131ab0f592b010a8d diff --git a/tests/test b/tests/test deleted file mode 100755 index 9888013..0000000 --- a/tests/test +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -if [ -d build ] && [ "$1" != "run" ]; then - rm -fr build -fi -if [ -d gcovr-report ] ; then - rm -fr gcovr-report -fi -cmake -S . -B build -Wno-dev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="--coverage" -DCMAKE_C_FLAGS="--coverage" -cmake --build build -cd build -ctest --output-on-failure -cd .. -mkdir gcovr-report -cd .. -gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --gcov-filter "Discretizer.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines diff --git a/tests/testKbins.py b/tests/testKbins.py deleted file mode 100644 index 5f8a671..0000000 --- a/tests/testKbins.py +++ /dev/null @@ -1,412 +0,0 @@ -from scipy.io.arff import loadarff -from sklearn.preprocessing import KBinsDiscretizer - - -def test(clf, X, expected, title): - X = [[x] for x in X] - clf.fit(X) - computed = [int(x[0]) for x in clf.transform(X)] - print(f"{title}") - print(f"{computed=}") - print(f"{expected=}") - assert computed == expected - print("-" * 80) - - -# Test Uniform Strategy -clf3u = KBinsDiscretizer( - n_bins=3, encode="ordinal", strategy="uniform", subsample=200_000 -) -clf3q = KBinsDiscretizer( - n_bins=3, encode="ordinal", strategy="quantile", subsample=200_000 -) -clf4u = KBinsDiscretizer( - n_bins=4, encode="ordinal", strategy="uniform", subsample=200_000 -) -clf4q = KBinsDiscretizer( - n_bins=4, encode="ordinal", strategy="quantile", subsample=200_000 -) -# -X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] -labels = [0, 0, 0, 1, 1, 1, 2, 2, 2] -test(clf3u, X, labels, title="Easy3BinsUniform") -test(clf3q, X, labels, title="Easy3BinsQuantile") -# -X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] -labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 2] -# En C++ se obtiene el mismo resultado en ambos, no como aquí -labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2] -test(clf3u, X, labels, title="X10BinsUniform") -test(clf3q, X, labels2, title="X10BinsQuantile") -# -X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0] -labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2] -# En C++ se obtiene el mismo resultado en ambos, no como aquí -# labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2] -test(clf3u, X, labels, title="X11BinsUniform") -test(clf3q, X, labels, title="X11BinsQuantile") -# -X = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] -labels = [0, 0, 0, 0, 0, 0] -test(clf3u, X, labels, title="ConstantUniform") -test(clf3q, X, labels, title="ConstantQuantile") -# -X = [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0] -labels = [2, 0, 0, 2, 0, 0, 2, 0, 0] -labels2 = [1, 0, 0, 1, 0, 0, 1, 0, 0] # igual que en C++ -test(clf3u, X, labels, title="EasyRepeatedUniform") -test(clf3q, X, labels2, title="EasyRepeatedQuantile") -# -X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] -labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] -test(clf4u, X, labels, title="Easy4BinsUniform") -test(clf4q, X, labels, title="Easy4BinsQuantile") -# -X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] -labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3] -test(clf4u, X, labels, title="X13BinsUniform") -test(clf4q, X, labels, title="X13BinsQuantile") -# -X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0] -labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3] -test(clf4u, X, labels, title="X14BinsUniform") -test(clf4q, X, labels, title="X14BinsQuantile") -# -X1 = [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] -X2 = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] -labels1 = [3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0] -labels2 = [3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0] -test(clf4u, X1, labels1, title="X15BinsUniform") -test(clf4q, X2, labels2, title="X15BinsQuantile") -# -X = [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0] -labels = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3] -test(clf4u, X, labels, title="RepeatedValuesUniform") -test(clf4q, X, labels, title="RepeatedValuesQuantile") - -print(f"Uniform {clf4u.bin_edges_=}") -print(f"Quaintile {clf4q.bin_edges_=}") -print("-" * 80) -# -data, meta = loadarff("tests/datasets/iris.arff") - -labelsu = [ - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 1, - 1, - 1, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 1, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 3, - 2, - 2, - 1, - 2, - 1, - 2, - 0, - 2, - 0, - 0, - 1, - 1, - 1, - 1, - 2, - 1, - 1, - 2, - 1, - 1, - 1, - 2, - 1, - 2, - 2, - 2, - 2, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 2, - 2, - 1, - 1, - 1, - 1, - 1, - 0, - 1, - 1, - 1, - 2, - 0, - 1, - 2, - 1, - 3, - 2, - 2, - 3, - 0, - 3, - 2, - 3, - 2, - 2, - 2, - 1, - 1, - 2, - 2, - 3, - 3, - 1, - 2, - 1, - 3, - 2, - 2, - 3, - 2, - 1, - 2, - 3, - 3, - 3, - 2, - 2, - 1, - 3, - 2, - 2, - 1, - 2, - 2, - 2, - 1, - 2, - 2, - 2, - 2, - 2, - 2, - 1, -] -labelsq = [ - 1, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 2, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 0, - 1, - 0, - 0, - 0, - 1, - 1, - 0, - 0, - 1, - 1, - 1, - 0, - 0, - 1, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 3, - 3, - 3, - 1, - 3, - 1, - 2, - 0, - 3, - 1, - 0, - 2, - 2, - 2, - 1, - 3, - 1, - 2, - 2, - 1, - 2, - 2, - 2, - 2, - 3, - 3, - 3, - 3, - 2, - 1, - 1, - 1, - 2, - 2, - 1, - 2, - 3, - 2, - 1, - 1, - 1, - 2, - 2, - 0, - 1, - 1, - 1, - 2, - 1, - 1, - 2, - 2, - 3, - 2, - 3, - 3, - 0, - 3, - 3, - 3, - 3, - 3, - 3, - 1, - 2, - 3, - 3, - 3, - 3, - 2, - 3, - 1, - 3, - 2, - 3, - 3, - 2, - 2, - 3, - 3, - 3, - 3, - 3, - 2, - 2, - 3, - 2, - 3, - 2, - 3, - 3, - 3, - 2, - 3, - 3, - 3, - 2, - 3, - 2, - 2, -] -# test(clf4u, data["sepallength"], labelsu, title="IrisUniform") -# test(clf4q, data["sepallength"], labelsq, title="IrisQuantile") -sepallength = [[x] for x in data["sepallength"]] -clf4u.fit(sepallength) -clf4q.fit(sepallength) -computedu = clf4u.transform(sepallength) -computedq = clf4q.transform(sepallength) -wrongu = 0 -wrongq = 0 -for i in range(len(labelsu)): - if labelsu[i] != computedu[i]: - wrongu += 1 - if labelsq[i] != computedq[i]: - wrongq += 1 -print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Uniform ={wrongu:3d}") -print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Quantile ={wrongq:3d}") diff --git a/tests/tests_do.py b/tests/tests_do.py new file mode 100644 index 0000000..357d073 --- /dev/null +++ b/tests/tests_do.py @@ -0,0 +1,71 @@ +# *************************************************************** +# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +# SPDX-FileType: SOURCE +# SPDX-License-Identifier: MIT +# *************************************************************** + +import json +from sklearn.preprocessing import KBinsDiscretizer + +with open("datasets/tests.txt") as f: + data = f.readlines() + +data = [x.strip() for x in data if x[0] != "#"] + +errors = False +for i in range(0, len(data), 4): + experiment_type = data[i] + print("Experiment:", data[i + 1]) + if experiment_type == "RANGE": + range_data = data[i + 1] + from_, to_, step_, n_bins_, strategy_ = range_data.split(",") + X = [[float(x)] for x in range(int(from_), int(to_), int(step_))] + else: + strategy_ = data[i + 1][0] + n_bins_ = data[i + 1][1] + vector = data[i + 1][2:] + X = [[float(x)] for x in json.loads(vector)] + + strategy = "quantile" if strategy_.strip() == "Q" else "uniform" + disc = KBinsDiscretizer( + n_bins=int(n_bins_), + encode="ordinal", + strategy=strategy, + ) + expected_data = data[i + 2] + cuts_data = data[i + 3] + disc.fit(X) + # + # Normalize the cutpoints to remove numerical errors such as 33.0000000001 + # instead of 33 + # + for j in range(len(disc.bin_edges_[0])): + disc.bin_edges_[0][j] = round(disc.bin_edges_[0][j], 5) + result = disc.transform(X) + result = [int(x) for x in result.flatten()] + expected = [int(x) for x in expected_data.split(",")] + # + # Check the Results + # + assert len(result) == len(expected) + for j in range(len(result)): + if result[j] != expected[j]: + print("* Error at", j, "Expected=", expected[j], "Result=", result[j]) + errors = True + expected_cuts = disc.bin_edges_[0] + computed_cuts = [float(x) for x in cuts_data.split(",")] + assert len(expected_cuts) == len(computed_cuts) + for j in range(len(expected_cuts)): + if round(expected_cuts[j], 5) != computed_cuts[j]: + print( + "* Error at", + j, + "Expected=", + expected_cuts[j], + "Result=", + computed_cuts[j], + ) + errors = True +if errors: + raise Exception("There were errors!") +print("*** All tests run succesfully! ***") diff --git a/tests/tests_generate.ipynb b/tests/tests_generate.ipynb new file mode 100644 index 0000000..247914e --- /dev/null +++ b/tests/tests_generate.ipynb @@ -0,0 +1,209 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import KBinsDiscretizer\n", + "from sklearn.datasets import load_iris" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "experiments_range = [\n", + " [0, 100, 1, 4, \"Q\"],\n", + " [0, 50, 1, 4, \"Q\"],\n", + " [0, 100, 1, 3, \"Q\"],\n", + " [0, 50, 1, 3, \"Q\"],\n", + " [0, 10, 1, 3, \"Q\"],\n", + " [0, 100, 1, 4, \"U\"],\n", + " [0, 50, 1, 4, \"U\"],\n", + " [0, 100, 1, 3, \"U\"],\n", + " [0, 50, 1, 3, \"U\"],\n", + "# \n", + " [0, 10, 1, 3, \"U\"],\n", + " [1, 10, 1, 3, \"Q\"],\n", + " [1, 10, 1, 3, \"U\"],\n", + " [1, 11, 1, 3, \"Q\"],\n", + " [1, 11, 1, 3, \"U\"],\n", + " [1, 12, 1, 3, \"Q\"],\n", + " [1, 12, 1, 3, \"U\"],\n", + " [1, 13, 1, 3, \"Q\"],\n", + " [1, 13, 1, 3, \"U\"],\n", + " [1, 14, 1, 3, \"Q\"],\n", + " [1, 14, 1, 3, \"U\"],\n", + " [1, 15, 1, 3, \"Q\"],\n", + " [1, 15, 1, 3, \"U\"]\n", + "]\n", + "experiments_vectors = [\n", + " (3, [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]),\n", + " (3, [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]),\n", + " (3, [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0])\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:307: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "def write_lists(file, data, cuts):\n", + " sep = \"\"\n", + " for res in data:\n", + " file.write(f\"{sep}{int(res):d}\")\n", + " sep= \", \"\n", + " file.write(\"\\n\")\n", + " sep = \"\"\n", + " for res in cuts:\n", + " file.write(sep + str(round(res,5)))\n", + " sep = \", \"\n", + " file.write(\"\\n\")\n", + "\n", + "def normalize_cuts(cuts):\n", + " #\n", + " # Normalize the cutpoints to remove numerical errors such as 33.0000000001\n", + " # instead of 33\n", + " #\n", + " for k in range(cuts.shape[0]):\n", + " for i in range(len(cuts[k])):\n", + " cuts[k][i] = round(cuts[k][i], 5)\n", + "\n", + "with open(\"datasets/tests.txt\", \"w\") as file:\n", + " file.write(\"#\\n\")\n", + " file.write(\"# from, to, step, #bins, Q/U\\n\")\n", + " file.write(\"# discretized data\\n\")\n", + " file.write(\"# cut points\\n\")\n", + " file.write(\"#\\n\")\n", + " #\n", + " # Range experiments\n", + " #\n", + " file.write(\"#\\n\")\n", + " file.write(\"# Range experiments\\n\")\n", + " file.write(\"#\\n\")\n", + " for experiment in experiments_range:\n", + " file.write(\"RANGE\\n\")\n", + " (from_, to_, step_, bins_, strategy) = experiment\n", + " disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n", + " data = [[x] for x in range(from_, to_, step_)]\n", + " disc.fit(data)\n", + " normalize_cuts(disc.bin_edges_)\n", + " result = disc.transform(data)\n", + " file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n", + " write_lists(file, result, disc.bin_edges_[0])\n", + " #\n", + " # Vector experiments\n", + " #\n", + " file.write(\"#\\n\")\n", + " file.write(\"# Vector experiments\\n\")\n", + " file.write(\"#\\n\")\n", + " for n_bins, experiment in experiments_vectors:\n", + " for strategy in [\"Q\", \"U\"]:\n", + " file.write(\"VECTOR\\n\")\n", + " file.write(f\"{strategy}{n_bins}{experiment}\\n\")\n", + " disc = KBinsDiscretizer(\n", + " n_bins=n_bins,\n", + " encode=\"ordinal\",\n", + " \n", + " strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\",\n", + " )\n", + " data = [[x] for x in experiment]\n", + " disc.fit(data)\n", + " normalize_cuts(disc.bin_edges_)\n", + " result = disc.transform(data)\n", + " write_lists(file, result, disc.bin_edges_[0])\n", + " #\n", + " # Vector experiments iris\n", + " #\n", + " file.write(\"#\\n\");\n", + " file.write(\"# Vector experiments with iris\\n\");\n", + " file.write(\"#\\n\");\n", + " X, y = load_iris(return_X_y=True)\n", + " for i in range(X.shape[1]):\n", + " for n_bins in [3, 4]:\n", + " for strategy in [\"Q\", \"U\"]:\n", + " file.write(\"VECTOR\\n\")\n", + " experiment = X[:, i]\n", + " file.write(f\"{strategy}{n_bins}{experiment.tolist()}\\n\")\n", + " disc = KBinsDiscretizer(\n", + " n_bins=n_bins,\n", + " encode=\"ordinal\",\n", + " strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\")\n", + " data = [[x] for x in experiment]\n", + " disc.fit(data)\n", + " normalize_cuts(disc.bin_edges_)\n", + " result = disc.transform(data)\n", + " write_lists(file, result, disc.bin_edges_[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cut points: [array([ 0., 33., 66., 99.])]\n", + "Mistaken transformed data disc.transform([[33]]) = [[0.]]\n", + "Reason of the mistake the cutpoint has decimals (double): 33.00000000000001\n" + ] + } + ], + "source": [ + "#\n", + "# Proving the mistakes due to floating point precision\n", + "#\n", + "from sklearn.preprocessing import KBinsDiscretizer\n", + "\n", + "data = [[x] for x in range(100)]\n", + "disc = KBinsDiscretizer(n_bins=3, encode=\"ordinal\", strategy=\"quantile\")\n", + "disc.fit(data)\n", + "print(\"Cut points: \", disc.bin_edges_)\n", + "print(\"Mistaken transformed data disc.transform([[33]]) =\", disc.transform([[33]]))\n", + "print(\"Reason of the mistake the cutpoint has decimals (double): \", disc.bin_edges_[0][1])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.1.undefined" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/update_coverage.py b/update_coverage.py new file mode 100644 index 0000000..1a5a213 --- /dev/null +++ b/update_coverage.py @@ -0,0 +1,38 @@ +# *************************************************************** +# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +# SPDX-FileType: SOURCE +# SPDX-License-Identifier: MIT +# *************************************************************** + +import subprocess +import sys + +readme_file = "README.md" +print("Updating coverage...") +# Generate badge line +output = subprocess.check_output( + "lcov --summary " + sys.argv[1] + "/coverage.info", + shell=True, +) +value = output.decode("utf-8").strip() +percentage = 0 +for line in value.splitlines(): + if "lines" in line: + percentage = float(line.split(":")[1].split("%")[0]) + break +print(f"Coverage: {percentage}%") +if percentage < 90: + print("⛔Coverage is less than 90%. I won't update the badge.") + sys.exit(1) +percentage_label = str(percentage).replace(".", ",") +coverage_line = f"[![Coverage Badge](https://img.shields.io/badge/Coverage-{percentage_label}%25-green)](html/index.html)" +# Update README.md +with open(readme_file, "r") as f: + lines = f.readlines() +with open(readme_file, "w") as f: + for line in lines: + if "img.shields.io/badge/Coverage" in line: + f.write(coverage_line + "\n") + else: + f.write(line) +print(f"✅Coverage updated with value: {percentage}")