Compare commits

..

9 Commits

18 changed files with 664 additions and 43 deletions

View File

@@ -1,4 +1,5 @@
{ {
"sonarCloudOrganization": "rmontanana", "sonarCloudOrganization": "rmontanana",
"projectKey": "rmontanana_BayesNet" "projectKey": "rmontanana_BayesNet",
"region": "EU"
} }

2
.vscode/launch.json vendored
View File

@@ -16,7 +16,7 @@
"name": "test", "name": "test",
"program": "${workspaceFolder}/build_Debug/tests/TestBayesNet", "program": "${workspaceFolder}/build_Debug/tests/TestBayesNet",
"args": [ "args": [
"[XBAODE]" "Test Dataset Loading"
], ],
"cwd": "${workspaceFolder}/build_Debug/tests" "cwd": "${workspaceFolder}/build_Debug/tests"
}, },

View File

@@ -5,7 +5,24 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased] ## [1.2.2] - 2025-08-19
### Fixed
- Fixed an issue with local discretization that was discretizing all features wether they were numeric or categorical.
- Fix testutils to return states for all features:
- An empty vector is now returned for numeric features.
- Categorical features now return their unique states.
## [1.2.1] - 2025-07-19
### Internal
- Update Libtorch to version 2.7.1
- Update libraries versions:
- mdlp: 2.1.1
- Folding: 1.1.2
- ArffFiles: 1.2.1
## [1.2.0] - 2025-07-08 ## [1.2.0] - 2025-07-08

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.27) cmake_minimum_required(VERSION 3.27)
project(bayesnet project(bayesnet
VERSION 1.2.0 VERSION 1.2.2
DESCRIPTION "Bayesian Network and basic classifiers Library." DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX LANGUAGES CXX

View File

@@ -21,15 +21,18 @@ sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g'
CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \ CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \
|| nproc --all 2>/dev/null \ || nproc --all 2>/dev/null \
|| sysctl -n hw.ncpu) || sysctl -n hw.ncpu)
# --- Your desired job count: CPUs 7, but never less than 1 --------------
JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1) JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1)
# Colors for output
GREEN = \033[0;32m
YELLOW = \033[1;33m
RED = \033[0;31m
NC = \033[0m # No Color
define ClearTests define ClearTests
@for t in $(test_targets); do \ @for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \ if [ -f $(f_debug)/tests/$$t ]; then \
echo ">>> Cleaning $$t..." ; \ echo ">>> Removing $$t..." ; \
rm -f $(f_debug)/tests/$$t ; \ rm -f $(f_debug)/tests/$$t ; \
fi ; \ fi ; \
done done
@@ -48,6 +51,20 @@ define setup_target
@echo ">>> Done" @echo ">>> Done"
endef endef
define status_file_folder
@if [ -d $(1) ]; then \
st1="$(GREEN)"; \
else \
st1="$(RED)"; \
fi; \
if [ -f $(1)/libbayesnet.a ]; then \
st2="$(GREEN)"; \
else \
st2="$(RED)"; \
fi; \
printf " $(YELLOW)$(2):$(NC) $$st1 Folder $(NC) $$st2 Library $(NC)\n"
endef
setup: ## Install dependencies for tests and coverage setup: ## Install dependencies for tests and coverage
@if [ "$(shell uname)" = "Darwin" ]; then \ @if [ "$(shell uname)" = "Darwin" ]; then \
brew install gcovr; \ brew install gcovr; \
@@ -61,12 +78,12 @@ setup: ## Install dependencies for tests and coverage
clean: ## Clean the project clean: ## Clean the project
@echo ">>> Cleaning the project..." @echo ">>> Cleaning the project..."
@if test -f CMakeCache.txt ; then echo "- Deleting CMakeCache.txt"; rm -f CMakeCache.txt; fimake @if test -f CMakeCache.txt ; then echo "- Deleting CMakeCache.txt"; rm -f CMakeCache.txt; fi
@for folder in $(f_release) $(f_debug) vpcpkg_installed install_test ; do \ @for folder in $(f_release) $(f_debug) vpcpkg_installed install_test ; do \
if test -d "$$folder" ; then \ if test -d "$$folder" ; then \
echo "- Deleting $$folder folder" ; \ echo "- Deleting $$folder folder" ; \
rm -rf "$$folder"; \ rm -rf "$$folder"; \
fi; \ fi; \
done done
@$(MAKE) clean-test @$(MAKE) clean-test
@echo ">>> Done"; @echo ">>> Done";
@@ -80,11 +97,12 @@ debug: ## Setup debug version using Conan
release: ## Setup release version using Conan release: ## Setup release version using Conan
@$(call setup_target,"Release","$(f_release)","ENABLE_TESTING=OFF") @$(call setup_target,"Release","$(f_release)","ENABLE_TESTING=OFF")
buildd: ## Build the debug targets buildd: ## Build the debug && test targets
cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(JOBS) @cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(JOBS)
@cmake --build $(f_debug) -t $(test_targets) --parallel $(JOBS)
buildr: ## Build the release targets buildr: ## Build the release targets
cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(JOBS) @cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(JOBS)
# Install targets # Install targets
@@ -241,9 +259,24 @@ sample: ## Build sample with Conan
sample/build/bayesnet_sample $(fname) $(model) sample/build/bayesnet_sample $(fname) $(model)
@echo ">>> Done"; @echo ">>> Done";
info: ## Show project information
@version=$$(grep -A1 "project(bayesnet" CMakeLists.txt | grep "VERSION" | sed 's/.*VERSION \([0-9.]*\).*/\1/'); \
printf "$(GREEN)BayesNet Library: $(YELLOW)ver. $$version$(NC)\n"
@echo ""
@printf "$(GREEN)Project folders:$(NC)\n"
$(call status_file_folder, $(f_release), "Build\ Release")
$(call status_file_folder, $(f_debug), "Build\ Debug\ \ ")
@echo ""
@printf "$(GREEN)Build commands:$(NC)\n"
@printf " $(YELLOW)make release && make buildr$(NC) - Build library for release\n"
@printf " $(YELLOW)make debug && make buildd$(NC) - Build library for debug\n"
@printf " $(YELLOW)make test$(NC) - Run tests\n"
@printf " $(YELLOW)Usage:$(NC) make help\n"
@echo ""
@printf " $(YELLOW)Parallel Jobs: $(GREEN)$(JOBS)$(NC)\n"
# Help target # Help target
# =========== # ===========
help: ## Show help message help: ## Show help message
@IFS=$$'\n' ; \ @IFS=$$'\n' ; \
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \ help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \

View File

@@ -37,6 +37,7 @@ namespace bayesnet {
className = className_; className = className_;
states = iterativeLocalDiscretization(y, static_cast<KDB*>(this), dataset, features, className, states_, smoothing); states = iterativeLocalDiscretization(y, static_cast<KDB*>(this), dataset, features, className, states_, smoothing);
KDB::fit(dataset, features, className, states, smoothing); KDB::fit(dataset, features, className, states, smoothing);
fitted = true;
return *this; return *this;
} }
torch::Tensor KDBLd::predict(torch::Tensor& X) torch::Tensor KDBLd::predict(torch::Tensor& X)

View File

@@ -101,6 +101,9 @@ namespace bayesnet {
auto xvf_ptr = Xf.index({ index }).data_ptr<float>(); auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1)); auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv); discretizers[feature]->fit(xvf, yxv);
// Enables the discretizer in predict time, because now we have a discretizer fitted for this feature,
// either it was a numeric feature in the beginning or not
wasNumeric[index] = true;
} }
if (upgrade) { if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers // Discretize again X (only the affected indices) with the new fitted discretizers
@@ -118,17 +121,20 @@ namespace bayesnet {
} }
return states; return states;
} }
map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y) map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states)
{ {
// Discretize the continuous input data and build pDataset (Classifier::dataset) // Discretize the continuous input data and build pDataset (Classifier::dataset)
// We expect to have in states for numeric features an empty vector and for discretized features a vector of states
int m = Xf.size(1); int m = Xf.size(1);
int n = Xf.size(0); int n = Xf.size(0);
map<std::string, std::vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, torch::kInt32); pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row) // discretize input data by feature(row)
std::unique_ptr<mdlp::Discretizer> discretizer; std::unique_ptr<mdlp::Discretizer> discretizer;
wasNumeric.resize(pFeatures.size());
for (auto i = 0; i < pFeatures.size(); ++i) { for (auto i = 0; i < pFeatures.size(); ++i) {
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
if (discretizationType == discretization_t::BINQ) { if (discretizationType == discretization_t::BINQ) {
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE); discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE);
} else if (discretizationType == discretization_t::BINU) { } else if (discretizationType == discretization_t::BINU) {
@@ -136,13 +142,19 @@ namespace bayesnet {
} else { // Default is MDLP } else { // Default is MDLP
discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts); discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts);
} }
auto Xt_ptr = Xf.index({ i }).data_ptr<float>(); if (states[pFeatures[i]].empty()) {
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); // If the feature is numeric, we discretize it
discretizer->fit(Xt, yv); pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->fit_transform(Xt, yv)));
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt))); int n_states = discretizer->getCutPoints().size() + 1;
auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1); auto xStates = std::vector<int>(n_states);
iota(xStates.begin(), xStates.end(), 0); iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates; states[pFeatures[i]] = xStates;
wasNumeric[i] = true;
} else {
wasNumeric[i] = false;
// If the feature is categorical, we just copy it
pDataset.index_put_({ i, "..." }, Xf[i].to(torch::kInt32));
}
discretizers[pFeatures[i]] = std::move(discretizer); discretizers[pFeatures[i]] = std::move(discretizer);
} }
int n_classes = torch::max(y).item<int>() + 1; int n_classes = torch::max(y).item<int>() + 1;
@@ -157,8 +169,13 @@ namespace bayesnet {
auto Xtd = torch::zeros_like(X, torch::kInt32); auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) { for (int i = 0; i < X.size(0); ++i) {
auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1)); auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[pFeatures[i]]->transform(Xt); std::vector<int> Xd;
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32)); if (wasNumeric[i]) {
auto Xd = discretizers[pFeatures[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
} else {
Xtd.index_put_({ i }, Xf[i].to(torch::kInt32));
}
} }
return Xtd; return Xtd;
} }
@@ -190,7 +207,7 @@ namespace bayesnet {
) )
{ {
// Phase 1: Initial discretization (same as original) // Phase 1: Initial discretization (same as original)
auto currentStates = fit_local_discretization(y); auto currentStates = fit_local_discretization(y, initialStates);
auto previousModel = Network(); auto previousModel = Network();
if (convergence_params.verbose) { if (convergence_params.verbose) {

View File

@@ -23,9 +23,8 @@ namespace bayesnet {
protected: protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y); void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X); torch::Tensor prepareX(torch::Tensor& X);
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model); // fit_local_discretization is only called by aodeld
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y); map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states);
// Iterative discretization method // Iterative discretization method
template<typename Classifier> template<typename Classifier>
map<std::string, std::vector<int>> iterativeLocalDiscretization( map<std::string, std::vector<int>> iterativeLocalDiscretization(
@@ -37,18 +36,15 @@ namespace bayesnet {
const map<std::string, std::vector<int>>& initialStates, const map<std::string, std::vector<int>>& initialStates,
const Smoothing_t smoothing const Smoothing_t smoothing
); );
torch::Tensor Xf; // X continuous nxm tensor torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor torch::Tensor y; // y discrete nx1 tensor
map<std::string, std::unique_ptr<mdlp::Discretizer>> discretizers; map<std::string, std::unique_ptr<mdlp::Discretizer>> discretizers;
// MDLP parameters // MDLP parameters
struct { struct {
size_t min_length = 3; // Minimum length of the interval to consider it in mdlp size_t min_length = 3; // Minimum length of the interval to consider it in mdlp
float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm
int max_depth = std::numeric_limits<int>::max(); // Maximum depth of the MDLP tree int max_depth = std::numeric_limits<int>::max(); // Maximum depth of the MDLP tree
} ld_params; } ld_params;
// Convergence parameters // Convergence parameters
struct { struct {
int maxIterations = 10; int maxIterations = 10;
@@ -60,10 +56,12 @@ namespace bayesnet {
"max_iterations", "verbose_convergence" "max_iterations", "verbose_convergence"
}; };
private: private:
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
std::vector<int> factorize(const std::vector<std::string>& labels_t); std::vector<int> factorize(const std::vector<std::string>& labels_t);
std::vector<std::string>& notes; // Notes during fit from BaseClassifier std::vector<std::string>& notes; // Notes during fit from BaseClassifier
torch::Tensor& pDataset; // (n+1)xm tensor torch::Tensor& pDataset; // (n+1)xm tensor
std::vector<std::string>& pFeatures; std::vector<std::string>& pFeatures;
std::vector<bool> wasNumeric;
std::string& pClassName; std::string& pClassName;
enum class discretization_t { enum class discretization_t {
MDLP, MDLP,

View File

@@ -36,6 +36,7 @@ namespace bayesnet {
className = className_; className = className_;
states = iterativeLocalDiscretization(y, static_cast<SPODE*>(this), dataset, features, className, states_, smoothing); states = iterativeLocalDiscretization(y, static_cast<SPODE*>(this), dataset, features, className, states_, smoothing);
SPODE::fit(dataset, features, className, states, smoothing); SPODE::fit(dataset, features, className, states, smoothing);
fitted = true;
return *this; return *this;
} }
torch::Tensor SPODELd::predict(torch::Tensor& X) torch::Tensor SPODELd::predict(torch::Tensor& X)

View File

@@ -35,6 +35,7 @@ namespace bayesnet {
className = className_; className = className_;
states = iterativeLocalDiscretization(y, static_cast<TAN*>(this), dataset, features, className, states_, smoothing); states = iterativeLocalDiscretization(y, static_cast<TAN*>(this), dataset, features, className, states_, smoothing);
TAN::fit(dataset, features, className, states, smoothing); TAN::fit(dataset, features, className, states, smoothing);
fitted = true;
return *this; return *this;
} }
torch::Tensor TANLd::predict(torch::Tensor& X) torch::Tensor TANLd::predict(torch::Tensor& X)

View File

@@ -19,11 +19,12 @@ namespace bayesnet {
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y); states = fit_local_discretization(y, states_);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal AODE structure, Ensemble::fit // 1st we need to fit the model to build the normal AODE structure, Ensemble::fit
// calls buildModel to initialize the base models // calls buildModel to initialize the base models
Ensemble::fit(dataset, features, className, states, smoothing); Ensemble::fit(dataset, features, className, states, smoothing);
fitted = true;
return *this; return *this;
} }

View File

@@ -60,7 +60,7 @@ class BayesNetConan(ConanFile):
self.requires("libtorch/2.7.1") self.requires("libtorch/2.7.1")
self.requires("nlohmann_json/3.11.3") self.requires("nlohmann_json/3.11.3")
self.requires("folding/1.1.2") # Custom package self.requires("folding/1.1.2") # Custom package
self.requires("fimdlp/2.1.1") # Custom package self.requires("fimdlp/2.1.2") # Custom package
def build_requirements(self): def build_requirements(self):
self.build_requires("cmake/[>=3.27]") self.build_requires("cmake/[>=3.27]")

View File

@@ -20,7 +20,7 @@
#include "bayesnet/ensembles/AODELd.h" #include "bayesnet/ensembles/AODELd.h"
#include "bayesnet/ensembles/BoostAODE.h" #include "bayesnet/ensembles/BoostAODE.h"
const std::string ACTUAL_VERSION = "1.2.0"; const std::string ACTUAL_VERSION = "1.2.2";
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{ {
@@ -496,3 +496,59 @@ TEST_CASE("Local discretization hyperparameters", "[Models]")
REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
REQUIRE(clft.getStatus() == bayesnet::NORMAL); REQUIRE(clft.getStatus() == bayesnet::NORMAL);
} }
TEST_CASE("Test Dataset Loading", "[Datasets]")
{
int max_sample = 4;
// Test loading a dataset
RawDatasets dataset("iris", true);
REQUIRE(dataset.Xt.size(0) == 4);
REQUIRE(dataset.Xt.size(1) == 150);
REQUIRE(dataset.yt.size(0) == 150);
std::cout << "Dataset iris discretized " << std::endl;
for (int sample = 0; sample < max_sample; sample++) {
for (int feature = 0; feature < 4; feature++) {
std::cout << dataset.Xt[feature][sample].item<int>() << " ";
}
std::cout << "| " << dataset.yt[sample].item<int>() << std::endl;
}
dataset = RawDatasets("iris", false);
std::cout << "Dataset iris raw " << std::endl;
for (int sample = 0; sample < max_sample; sample++) {
for (int feature = 0; feature < 4; feature++) {
std::cout << dataset.Xt[feature][sample].item<float>() << " ";
}
std::cout << "| " << dataset.yt[sample].item<int>() << std::endl;
}
// Test loading a dataset
dataset = RawDatasets("heart-statlog", true);
REQUIRE(dataset.Xt.size(0) == 13);
REQUIRE(dataset.Xt.size(1) == 270);
REQUIRE(dataset.yt.size(0) == 270);
std::cout << "Dataset heart-statlog discretized " << std::endl;
for (int sample = 0; sample < max_sample; sample++) {
for (int feature = 0; feature < 13; feature++) {
std::cout << dataset.Xt[feature][sample].item<int>() << " ";
}
std::cout << "| " << dataset.yt[sample].item<int>() << std::endl;
}
auto features = dataset.features;
std::cout << "States:" << std::endl;
for (int i = 0; i < 13; i++) {
std::cout << i << " has " << dataset.states.at(features[i]).size() << " states." << std::endl;
}
dataset = RawDatasets("heart-statlog", false);
std::cout << "Dataset heart-statlog raw " << std::endl;
for (int sample = 0; sample < max_sample; sample++) {
for (int feature = 0; feature < 13; feature++) {
std::cout << dataset.Xt[feature][sample].item<float>() << " ";
}
std::cout << "| " << dataset.yt[sample].item<int>() << std::endl;
}
std::cout << "States:" << std::endl;
for (int i = 0; i < 13; i++) {
std::cout << i << " has " << dataset.states.at(features[i]).size() << " states." << std::endl;
}
auto clf = bayesnet::TANLd();
clf.fit(dataset.Xt, dataset.yt, dataset.features, dataset.className, dataset.states, dataset.smoothing);
std::cout << "Score: " << clf.score(dataset.Xt, dataset.yt) << std::endl;
}

View File

@@ -16,7 +16,7 @@
#include "TestUtils.h" #include "TestUtils.h"
std::map<std::string, std::string> modules = { std::map<std::string, std::string> modules = {
{ "mdlp", "2.1.1" }, { "mdlp", "2.1.2" },
{ "Folding", "1.1.2" }, { "Folding", "1.1.2" },
{ "json", "3.11" }, { "json", "3.11" },
{ "ArffFiles", "1.2.1" } { "ArffFiles", "1.2.1" }

View File

@@ -5,6 +5,7 @@
// *************************************************************** // ***************************************************************
#include <random> #include <random>
#include <nlohmann/json.hpp>
#include "TestUtils.h" #include "TestUtils.h"
#include "bayesnet/config.h" #include "bayesnet/config.h"
@@ -51,6 +52,7 @@ private:
RawDatasets::RawDatasets(const std::string& file_name, bool discretize_, int num_samples_, bool shuffle_, bool class_last, bool debug) RawDatasets::RawDatasets(const std::string& file_name, bool discretize_, int num_samples_, bool shuffle_, bool class_last, bool debug)
{ {
catalog = loadCatalog();
num_samples = num_samples_; num_samples = num_samples_;
shuffle = shuffle_; shuffle = shuffle_;
discretize = discretize_; discretize = discretize_;
@@ -62,7 +64,7 @@ RawDatasets::RawDatasets(const std::string& file_name, bool discretize_, int num
nSamples = dataset.size(1); nSamples = dataset.size(1);
weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble); weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
weightsv = std::vector<double>(nSamples, 1.0 / nSamples); weightsv = std::vector<double>(nSamples, 1.0 / nSamples);
classNumStates = discretize ? states.at(className).size() : 0; classNumStates = states.at(className).size();
auto fold = folding::StratifiedKFold(5, yt, 271); auto fold = folding::StratifiedKFold(5, yt, 271);
auto [train, test] = fold.getFold(0); auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train); auto train_t = torch::tensor(train);
@@ -78,18 +80,90 @@ RawDatasets::RawDatasets(const std::string& file_name, bool discretize_, int num
map<std::string, int> RawDatasets::discretizeDataset(std::vector<mdlp::samples_t>& X) map<std::string, int> RawDatasets::discretizeDataset(std::vector<mdlp::samples_t>& X)
{ {
map<std::string, int> maxes; map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp(); auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) { for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], yv); mdlp::labels_t xd;
mdlp::labels_t& xd = fimdlp.transform(X[i]); if (is_numeric.at(i)) {
fimdlp.fit(X[i], yv);
xd = fimdlp.transform(X[i]);
} else {
std::transform(X[i].begin(), X[i].end(), back_inserter(xd), [](const auto& val) {
return static_cast<int>(val);
});
}
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1; maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xv.push_back(xd); Xv.push_back(xd);
} }
return maxes; return maxes;
} }
map<std::string, std::vector<int>> RawDatasets::loadCatalog()
{
map<std::string, std::vector<int>> catalogNames;
ifstream catalog(Paths::datasets() + "all.txt");
std::vector<int> numericFeaturesIdx;
if (!catalog.is_open()) {
throw std::invalid_argument("Unable to open catalog file. [" + Paths::datasets() + +"all.txt" + "]");
}
std::string line;
std::vector<std::string> sorted_lines;
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
sorted_lines.push_back(line);
}
sort(sorted_lines.begin(), sorted_lines.end(), [](const auto& lhs, const auto& rhs) {
const auto result = mismatch(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend(), [](const auto& lhs, const auto& rhs) {return tolower(lhs) == tolower(rhs);});
return result.second != rhs.cend() && (result.first == lhs.cend() || tolower(*result.first) < tolower(*result.second));
});
for (const auto& line : sorted_lines) {
std::vector<std::string> tokens = split(line, ';');
std::string name = tokens[0];
std::string className;
numericFeaturesIdx.clear();
int size = tokens.size();
switch (size) {
case 1:
className = "-1";
numericFeaturesIdx.push_back(-1);
break;
case 2:
className = tokens[1];
numericFeaturesIdx.push_back(-1);
break;
case 3:
{
className = tokens[1];
auto numericFeatures = tokens[2];
if (numericFeatures == "all") {
numericFeaturesIdx.push_back(-1);
} else {
if (numericFeatures != "none") {
auto features = nlohmann::json::parse(numericFeatures);
for (auto& f : features) {
numericFeaturesIdx.push_back(f);
}
}
}
}
break;
default:
throw std::invalid_argument("Invalid catalog file format.");
}
catalogNames[name] = numericFeaturesIdx;
}
catalog.close();
if (catalogNames.empty()) {
throw std::invalid_argument("Catalog is empty. Please check the catalog file.");
}
return catalogNames;
}
void RawDatasets::loadDataset(const std::string& name, bool class_last) void RawDatasets::loadDataset(const std::string& name, bool class_last)
{ {
auto handler = ShuffleArffFiles(num_samples, shuffle); auto handler = ShuffleArffFiles(num_samples, shuffle);
@@ -101,6 +175,26 @@ void RawDatasets::loadDataset(const std::string& name, bool class_last)
className = handler.getClassName(); className = handler.getClassName();
auto attributes = handler.getAttributes(); auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
is_numeric.clear();
is_numeric.reserve(features.size());
auto numericFeaturesIdx = catalog.at(name);
if (numericFeaturesIdx.empty()) {
// no numeric features
is_numeric.assign(features.size(), false);
} else {
if (numericFeaturesIdx[0] == -1) {
// all features are numeric
is_numeric.assign(features.size(), true);
} else {
// some features are numeric
is_numeric.assign(features.size(), false);
for (const auto& idx : numericFeaturesIdx) {
if (idx >= 0 && idx < features.size()) {
is_numeric[idx] = true;
}
}
}
}
// Discretize Dataset // Discretize Dataset
auto maxValues = discretizeDataset(X); auto maxValues = discretizeDataset(X);
maxValues[className] = *max_element(yv.begin(), yv.end()) + 1; maxValues[className] = *max_element(yv.begin(), yv.end()) + 1;
@@ -113,13 +207,23 @@ void RawDatasets::loadDataset(const std::string& name, bool class_last)
Xt.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kInt32)); Xt.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kInt32));
} }
states[className] = std::vector<int>(maxValues[className]); states[className] = std::vector<int>(maxValues[className]);
iota(begin(states.at(className)), end(states.at(className)), 0);
} else { } else {
Xt = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32); Xt = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(X[i])); Xt.index_put_({ i, "..." }, torch::tensor(X[i]));
if (!is_numeric.at(i)) {
states[features[i]] = std::vector<int>(maxValues[features[i]]);
iota(begin(states.at(features[i])), end(states.at(features[i])), 0);
} else {
states[features[i]] = std::vector<int>();
}
} }
yt = torch::tensor(yv, torch::kInt32);
int maxy = *max_element(yv.begin(), yv.end()) + 1;
states[className] = std::vector<int>(maxy);
} }
iota(begin(states.at(className)), end(states.at(className)), 0);
yt = torch::tensor(yv, torch::kInt32); yt = torch::tensor(yv, torch::kInt32);
} }

View File

@@ -27,7 +27,11 @@ public:
std::vector<double> weightsv; std::vector<double> weightsv;
std::vector<string> features; std::vector<string> features;
std::string className; std::string className;
std::vector<bool> is_numeric; // indicates whether each feature is numeric
map<std::string, std::vector<int>> states; map<std::string, std::vector<int>> states;
//catalog holds the mapping between dataset names and their corresponding indices of numeric features (-1) means all are numeric
//and an empty vector means none are numeric
map<std::string, std::vector<int>> catalog;
int nSamples, classNumStates; int nSamples, classNumStates;
double epsilon = 1e-5; double epsilon = 1e-5;
bool discretize; bool discretize;
@@ -65,8 +69,30 @@ private:
+ "classNumStates: " + std::to_string(classNumStates) + "\n" + "classNumStates: " + std::to_string(classNumStates) + "\n"
+ "states: " + states_ + "\n"; + "states: " + states_ + "\n";
} }
std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
std::vector<std::string> split(const std::string& text, char delimiter)
{
std::vector<std::string> result;
std::stringstream ss(text);
std::string token;
while (std::getline(ss, token, delimiter)) {
result.push_back(trim(token));
}
return result;
}
map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X); map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X);
void loadDataset(const std::string& name, bool class_last); void loadDataset(const std::string& name, bool class_last);
map<std::string, std::vector<int>> loadCatalog();
}; };
#endif //TEST_UTILS_H #endif //TEST_UTILS_H

27
tests/data/all.txt Normal file
View File

@@ -0,0 +1,27 @@
adult;class;[0,2,4,10,11,12]
balance-scale;class; all
breast-w;Class; all
diabetes;class; all
ecoli;class; all
glass;Type; all
hayes-roth;class; none
heart-statlog;class; [0,3,4,7,9,11]
ionosphere;class; all
iris;class; all
kdd_JapaneseVowels;speaker; all
letter;class; all
liver-disorders;selector; all
mfeat-factors;class; all
mfeat-fourier;class; all
mfeat-karhunen;class; all
mfeat-morphological;class; all
mfeat-zernike;class; all
optdigits;class; all
page-blocks;class; all
pendigits;class; all
segment;class; all
sonar;Class; all
spambase;class; all
vehicle;Class; all
waveform-5000;class; all
wine;class; all

338
tests/data/heart-statlog.arff Executable file
View File

@@ -0,0 +1,338 @@
% This database contains 13 attributes (which have been extracted from
% a larger set of 75)
%
%
%
% Attribute Information:
% ------------------------
% 0 -- 1. age
% 1 -- 2. sex
% 2 -- 3. chest pain type (4 values)
% 3 -- 4. resting blood pressure
% 4 -- 5. serum cholestoral in mg/dl
% 5 -- 6. fasting blood sugar > 120 mg/dl
% 6 -- 7. resting electrocardiographic results (values 0,1,2)
% 7 -- 8. maximum heart rate achieved
% 8 -- 9. exercise induced angina
% 9 -- 10. oldpeak = ST depression induced by exercise relative to rest
% 10 -- 11. the slope of the peak exercise ST segment
% 11 -- 12. number of major vessels (0-3) colored by flourosopy
% 12 -- 13. thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
%
% Attributes types
% -----------------
%
% Real: 1,4,5,8,10,12
% Ordered:11,
% Binary: 2,6,9
% Nominal:7,3,13
%
% Variable to be predicted
% ------------------------
% Absence (1) or presence (2) of heart disease
%
% Cost Matrix
%
% abse pres
% absence 0 1
% presence 5 0
%
% where the rows represent the true values and the columns the predicted.
%
% No missing values.
%
% 270 observations
%
%
%
%
% Relabeled values in attribute class
% From: 1 To: absent
% From: 2 To: present
%
@relation heart-statlog
@attribute age real
@attribute sex real
@attribute chest real
@attribute resting_blood_pressure real
@attribute serum_cholestoral real
@attribute fasting_blood_sugar real
@attribute resting_electrocardiographic_results real
@attribute maximum_heart_rate_achieved real
@attribute exercise_induced_angina real
@attribute oldpeak real
@attribute slope real
@attribute number_of_major_vessels real
@attribute thal real
@attribute class { absent, present}
@data
70,1,4,130,322,0,2,109,0,2.4,2,3,3,present
67,0,3,115,564,0,2,160,0,1.6,2,0,7,absent
57,1,2,124,261,0,0,141,0,0.3,1,0,7,present
64,1,4,128,263,0,0,105,1,0.2,2,1,7,absent
74,0,2,120,269,0,2,121,1,0.2,1,1,3,absent
65,1,4,120,177,0,0,140,0,0.4,1,0,7,absent
56,1,3,130,256,1,2,142,1,0.6,2,1,6,present
59,1,4,110,239,0,2,142,1,1.2,2,1,7,present
60,1,4,140,293,0,2,170,0,1.2,2,2,7,present
63,0,4,150,407,0,2,154,0,4,2,3,7,present
59,1,4,135,234,0,0,161,0,0.5,2,0,7,absent
53,1,4,142,226,0,2,111,1,0,1,0,7,absent
44,1,3,140,235,0,2,180,0,0,1,0,3,absent
61,1,1,134,234,0,0,145,0,2.6,2,2,3,present
57,0,4,128,303,0,2,159,0,0,1,1,3,absent
71,0,4,112,149,0,0,125,0,1.6,2,0,3,absent
46,1,4,140,311,0,0,120,1,1.8,2,2,7,present
53,1,4,140,203,1,2,155,1,3.1,3,0,7,present
64,1,1,110,211,0,2,144,1,1.8,2,0,3,absent
40,1,1,140,199,0,0,178,1,1.4,1,0,7,absent
67,1,4,120,229,0,2,129,1,2.6,2,2,7,present
48,1,2,130,245,0,2,180,0,0.2,2,0,3,absent
43,1,4,115,303,0,0,181,0,1.2,2,0,3,absent
47,1,4,112,204,0,0,143,0,0.1,1,0,3,absent
54,0,2,132,288,1,2,159,1,0,1,1,3,absent
48,0,3,130,275,0,0,139,0,0.2,1,0,3,absent
46,0,4,138,243,0,2,152,1,0,2,0,3,absent
51,0,3,120,295,0,2,157,0,0.6,1,0,3,absent
58,1,3,112,230,0,2,165,0,2.5,2,1,7,present
71,0,3,110,265,1,2,130,0,0,1,1,3,absent
57,1,3,128,229,0,2,150,0,0.4,2,1,7,present
66,1,4,160,228,0,2,138,0,2.3,1,0,6,absent
37,0,3,120,215,0,0,170,0,0,1,0,3,absent
59,1,4,170,326,0,2,140,1,3.4,3,0,7,present
50,1,4,144,200,0,2,126,1,0.9,2,0,7,present
48,1,4,130,256,1,2,150,1,0,1,2,7,present
61,1,4,140,207,0,2,138,1,1.9,1,1,7,present
59,1,1,160,273,0,2,125,0,0,1,0,3,present
42,1,3,130,180,0,0,150,0,0,1,0,3,absent
48,1,4,122,222,0,2,186,0,0,1,0,3,absent
40,1,4,152,223,0,0,181,0,0,1,0,7,present
62,0,4,124,209,0,0,163,0,0,1,0,3,absent
44,1,3,130,233,0,0,179,1,0.4,1,0,3,absent
46,1,2,101,197,1,0,156,0,0,1,0,7,absent
59,1,3,126,218,1,0,134,0,2.2,2,1,6,present
58,1,3,140,211,1,2,165,0,0,1,0,3,absent
49,1,3,118,149,0,2,126,0,0.8,1,3,3,present
44,1,4,110,197,0,2,177,0,0,1,1,3,present
66,1,2,160,246,0,0,120,1,0,2,3,6,present
65,0,4,150,225,0,2,114,0,1,2,3,7,present
42,1,4,136,315,0,0,125,1,1.8,2,0,6,present
52,1,2,128,205,1,0,184,0,0,1,0,3,absent
65,0,3,140,417,1,2,157,0,0.8,1,1,3,absent
63,0,2,140,195,0,0,179,0,0,1,2,3,absent
45,0,2,130,234,0,2,175,0,0.6,2,0,3,absent
41,0,2,105,198,0,0,168,0,0,1,1,3,absent
61,1,4,138,166,0,2,125,1,3.6,2,1,3,present
60,0,3,120,178,1,0,96,0,0,1,0,3,absent
59,0,4,174,249,0,0,143,1,0,2,0,3,present
62,1,2,120,281,0,2,103,0,1.4,2,1,7,present
57,1,3,150,126,1,0,173,0,0.2,1,1,7,absent
51,0,4,130,305,0,0,142,1,1.2,2,0,7,present
44,1,3,120,226,0,0,169,0,0,1,0,3,absent
60,0,1,150,240,0,0,171,0,0.9,1,0,3,absent
63,1,1,145,233,1,2,150,0,2.3,3,0,6,absent
57,1,4,150,276,0,2,112,1,0.6,2,1,6,present
51,1,4,140,261,0,2,186,1,0,1,0,3,absent
58,0,2,136,319,1,2,152,0,0,1,2,3,present
44,0,3,118,242,0,0,149,0,0.3,2,1,3,absent
47,1,3,108,243,0,0,152,0,0,1,0,3,present
61,1,4,120,260,0,0,140,1,3.6,2,1,7,present
57,0,4,120,354,0,0,163,1,0.6,1,0,3,absent
70,1,2,156,245,0,2,143,0,0,1,0,3,absent
76,0,3,140,197,0,1,116,0,1.1,2,0,3,absent
67,0,4,106,223,0,0,142,0,0.3,1,2,3,absent
45,1,4,142,309,0,2,147,1,0,2,3,7,present
45,1,4,104,208,0,2,148,1,3,2,0,3,absent
39,0,3,94,199,0,0,179,0,0,1,0,3,absent
42,0,3,120,209,0,0,173,0,0,2,0,3,absent
56,1,2,120,236,0,0,178,0,0.8,1,0,3,absent
58,1,4,146,218,0,0,105,0,2,2,1,7,present
35,1,4,120,198,0,0,130,1,1.6,2,0,7,present
58,1,4,150,270,0,2,111,1,0.8,1,0,7,present
41,1,3,130,214,0,2,168,0,2,2,0,3,absent
57,1,4,110,201,0,0,126,1,1.5,2,0,6,absent
42,1,1,148,244,0,2,178,0,0.8,1,2,3,absent
62,1,2,128,208,1,2,140,0,0,1,0,3,absent
59,1,1,178,270,0,2,145,0,4.2,3,0,7,absent
41,0,2,126,306,0,0,163,0,0,1,0,3,absent
50,1,4,150,243,0,2,128,0,2.6,2,0,7,present
59,1,2,140,221,0,0,164,1,0,1,0,3,absent
61,0,4,130,330,0,2,169,0,0,1,0,3,present
54,1,4,124,266,0,2,109,1,2.2,2,1,7,present
54,1,4,110,206,0,2,108,1,0,2,1,3,present
52,1,4,125,212,0,0,168,0,1,1,2,7,present
47,1,4,110,275,0,2,118,1,1,2,1,3,present
66,1,4,120,302,0,2,151,0,0.4,2,0,3,absent
58,1,4,100,234,0,0,156,0,0.1,1,1,7,present
64,0,3,140,313,0,0,133,0,0.2,1,0,7,absent
50,0,2,120,244,0,0,162,0,1.1,1,0,3,absent
44,0,3,108,141,0,0,175,0,0.6,2,0,3,absent
67,1,4,120,237,0,0,71,0,1,2,0,3,present
49,0,4,130,269,0,0,163,0,0,1,0,3,absent
57,1,4,165,289,1,2,124,0,1,2,3,7,present
63,1,4,130,254,0,2,147,0,1.4,2,1,7,present
48,1,4,124,274,0,2,166,0,0.5,2,0,7,present
51,1,3,100,222,0,0,143,1,1.2,2,0,3,absent
60,0,4,150,258,0,2,157,0,2.6,2,2,7,present
59,1,4,140,177,0,0,162,1,0,1,1,7,present
45,0,2,112,160,0,0,138,0,0,2,0,3,absent
55,0,4,180,327,0,1,117,1,3.4,2,0,3,present
41,1,2,110,235,0,0,153,0,0,1,0,3,absent
60,0,4,158,305,0,2,161,0,0,1,0,3,present
54,0,3,135,304,1,0,170,0,0,1,0,3,absent
42,1,2,120,295,0,0,162,0,0,1,0,3,absent
49,0,2,134,271,0,0,162,0,0,2,0,3,absent
46,1,4,120,249,0,2,144,0,0.8,1,0,7,present
56,0,4,200,288,1,2,133,1,4,3,2,7,present
66,0,1,150,226,0,0,114,0,2.6,3,0,3,absent
56,1,4,130,283,1,2,103,1,1.6,3,0,7,present
49,1,3,120,188,0,0,139,0,2,2,3,7,present
54,1,4,122,286,0,2,116,1,3.2,2,2,3,present
57,1,4,152,274,0,0,88,1,1.2,2,1,7,present
65,0,3,160,360,0,2,151,0,0.8,1,0,3,absent
54,1,3,125,273,0,2,152,0,0.5,3,1,3,absent
54,0,3,160,201,0,0,163,0,0,1,1,3,absent
62,1,4,120,267,0,0,99,1,1.8,2,2,7,present
52,0,3,136,196,0,2,169,0,0.1,2,0,3,absent
52,1,2,134,201,0,0,158,0,0.8,1,1,3,absent
60,1,4,117,230,1,0,160,1,1.4,1,2,7,present
63,0,4,108,269,0,0,169,1,1.8,2,2,3,present
66,1,4,112,212,0,2,132,1,0.1,1,1,3,present
42,1,4,140,226,0,0,178,0,0,1,0,3,absent
64,1,4,120,246,0,2,96,1,2.2,3,1,3,present
54,1,3,150,232,0,2,165,0,1.6,1,0,7,absent
46,0,3,142,177,0,2,160,1,1.4,3,0,3,absent
67,0,3,152,277,0,0,172,0,0,1,1,3,absent
56,1,4,125,249,1,2,144,1,1.2,2,1,3,present
34,0,2,118,210,0,0,192,0,0.7,1,0,3,absent
57,1,4,132,207,0,0,168,1,0,1,0,7,absent
64,1,4,145,212,0,2,132,0,2,2,2,6,present
59,1,4,138,271,0,2,182,0,0,1,0,3,absent
50,1,3,140,233,0,0,163,0,0.6,2,1,7,present
51,1,1,125,213,0,2,125,1,1.4,1,1,3,absent
54,1,2,192,283,0,2,195,0,0,1,1,7,present
53,1,4,123,282,0,0,95,1,2,2,2,7,present
52,1,4,112,230,0,0,160,0,0,1,1,3,present
40,1,4,110,167,0,2,114,1,2,2,0,7,present
58,1,3,132,224,0,2,173,0,3.2,1,2,7,present
41,0,3,112,268,0,2,172,1,0,1,0,3,absent
41,1,3,112,250,0,0,179,0,0,1,0,3,absent
50,0,3,120,219,0,0,158,0,1.6,2,0,3,absent
54,0,3,108,267,0,2,167,0,0,1,0,3,absent
64,0,4,130,303,0,0,122,0,2,2,2,3,absent
51,0,3,130,256,0,2,149,0,0.5,1,0,3,absent
46,0,2,105,204,0,0,172,0,0,1,0,3,absent
55,1,4,140,217,0,0,111,1,5.6,3,0,7,present
45,1,2,128,308,0,2,170,0,0,1,0,3,absent
56,1,1,120,193,0,2,162,0,1.9,2,0,7,absent
66,0,4,178,228,1,0,165,1,1,2,2,7,present
38,1,1,120,231,0,0,182,1,3.8,2,0,7,present
62,0,4,150,244,0,0,154,1,1.4,2,0,3,present
55,1,2,130,262,0,0,155,0,0,1,0,3,absent
58,1,4,128,259,0,2,130,1,3,2,2,7,present
43,1,4,110,211,0,0,161,0,0,1,0,7,absent
64,0,4,180,325,0,0,154,1,0,1,0,3,absent
50,0,4,110,254,0,2,159,0,0,1,0,3,absent
53,1,3,130,197,1,2,152,0,1.2,3,0,3,absent
45,0,4,138,236,0,2,152,1,0.2,2,0,3,absent
65,1,1,138,282,1,2,174,0,1.4,2,1,3,present
69,1,1,160,234,1,2,131,0,0.1,2,1,3,absent
69,1,3,140,254,0,2,146,0,2,2,3,7,present
67,1,4,100,299,0,2,125,1,0.9,2,2,3,present
68,0,3,120,211,0,2,115,0,1.5,2,0,3,absent
34,1,1,118,182,0,2,174,0,0,1,0,3,absent
62,0,4,138,294,1,0,106,0,1.9,2,3,3,present
51,1,4,140,298,0,0,122,1,4.2,2,3,7,present
46,1,3,150,231,0,0,147,0,3.6,2,0,3,present
67,1,4,125,254,1,0,163,0,0.2,2,2,7,present
50,1,3,129,196,0,0,163,0,0,1,0,3,absent
42,1,3,120,240,1,0,194,0,0.8,3,0,7,absent
56,0,4,134,409,0,2,150,1,1.9,2,2,7,present
41,1,4,110,172,0,2,158,0,0,1,0,7,present
42,0,4,102,265,0,2,122,0,0.6,2,0,3,absent
53,1,3,130,246,1,2,173,0,0,1,3,3,absent
43,1,3,130,315,0,0,162,0,1.9,1,1,3,absent
56,1,4,132,184,0,2,105,1,2.1,2,1,6,present
52,1,4,108,233,1,0,147,0,0.1,1,3,7,absent
62,0,4,140,394,0,2,157,0,1.2,2,0,3,absent
70,1,3,160,269,0,0,112,1,2.9,2,1,7,present
54,1,4,140,239,0,0,160,0,1.2,1,0,3,absent
70,1,4,145,174,0,0,125,1,2.6,3,0,7,present
54,1,2,108,309,0,0,156,0,0,1,0,7,absent
35,1,4,126,282,0,2,156,1,0,1,0,7,present
48,1,3,124,255,1,0,175,0,0,1,2,3,absent
55,0,2,135,250,0,2,161,0,1.4,2,0,3,absent
58,0,4,100,248,0,2,122,0,1,2,0,3,absent
54,0,3,110,214,0,0,158,0,1.6,2,0,3,absent
69,0,1,140,239,0,0,151,0,1.8,1,2,3,absent
77,1,4,125,304,0,2,162,1,0,1,3,3,present
68,1,3,118,277,0,0,151,0,1,1,1,7,absent
58,1,4,125,300,0,2,171,0,0,1,2,7,present
60,1,4,125,258,0,2,141,1,2.8,2,1,7,present
51,1,4,140,299,0,0,173,1,1.6,1,0,7,present
55,1,4,160,289,0,2,145,1,0.8,2,1,7,present
52,1,1,152,298,1,0,178,0,1.2,2,0,7,absent
60,0,3,102,318,0,0,160,0,0,1,1,3,absent
58,1,3,105,240,0,2,154,1,0.6,2,0,7,absent
64,1,3,125,309,0,0,131,1,1.8,2,0,7,present
37,1,3,130,250,0,0,187,0,3.5,3,0,3,absent
59,1,1,170,288,0,2,159,0,0.2,2,0,7,present
51,1,3,125,245,1,2,166,0,2.4,2,0,3,absent
43,0,3,122,213,0,0,165,0,0.2,2,0,3,absent
58,1,4,128,216,0,2,131,1,2.2,2,3,7,present
29,1,2,130,204,0,2,202,0,0,1,0,3,absent
41,0,2,130,204,0,2,172,0,1.4,1,0,3,absent
63,0,3,135,252,0,2,172,0,0,1,0,3,absent
51,1,3,94,227,0,0,154,1,0,1,1,7,absent
54,1,3,120,258,0,2,147,0,0.4,2,0,7,absent
44,1,2,120,220,0,0,170,0,0,1,0,3,absent
54,1,4,110,239,0,0,126,1,2.8,2,1,7,present
65,1,4,135,254,0,2,127,0,2.8,2,1,7,present
57,1,3,150,168,0,0,174,0,1.6,1,0,3,absent
63,1,4,130,330,1,2,132,1,1.8,1,3,7,present
35,0,4,138,183,0,0,182,0,1.4,1,0,3,absent
41,1,2,135,203,0,0,132,0,0,2,0,6,absent
62,0,3,130,263,0,0,97,0,1.2,2,1,7,present
43,0,4,132,341,1,2,136,1,3,2,0,7,present
58,0,1,150,283,1,2,162,0,1,1,0,3,absent
52,1,1,118,186,0,2,190,0,0,2,0,6,absent
61,0,4,145,307,0,2,146,1,1,2,0,7,present
39,1,4,118,219,0,0,140,0,1.2,2,0,7,present
45,1,4,115,260,0,2,185,0,0,1,0,3,absent
52,1,4,128,255,0,0,161,1,0,1,1,7,present
62,1,3,130,231,0,0,146,0,1.8,2,3,7,absent
62,0,4,160,164,0,2,145,0,6.2,3,3,7,present
53,0,4,138,234,0,2,160,0,0,1,0,3,absent
43,1,4,120,177,0,2,120,1,2.5,2,0,7,present
47,1,3,138,257,0,2,156,0,0,1,0,3,absent
52,1,2,120,325,0,0,172,0,0.2,1,0,3,absent
68,1,3,180,274,1,2,150,1,1.6,2,0,7,present
39,1,3,140,321,0,2,182,0,0,1,0,3,absent
53,0,4,130,264,0,2,143,0,0.4,2,0,3,absent
62,0,4,140,268,0,2,160,0,3.6,3,2,3,present
51,0,3,140,308,0,2,142,0,1.5,1,1,3,absent
60,1,4,130,253,0,0,144,1,1.4,1,1,7,present
65,1,4,110,248,0,2,158,0,0.6,1,2,6,present
65,0,3,155,269,0,0,148,0,0.8,1,0,3,absent
60,1,3,140,185,0,2,155,0,3,2,0,3,present
60,1,4,145,282,0,2,142,1,2.8,2,2,7,present
54,1,4,120,188,0,0,113,0,1.4,2,1,7,present
44,1,2,130,219,0,2,188,0,0,1,0,3,absent
44,1,4,112,290,0,2,153,0,0,1,1,3,present
51,1,3,110,175,0,0,123,0,0.6,1,0,3,absent
59,1,3,150,212,1,0,157,0,1.6,1,0,3,absent
71,0,2,160,302,0,0,162,0,0.4,1,2,3,absent
61,1,3,150,243,1,0,137,1,1,2,0,3,absent
55,1,4,132,353,0,0,132,1,1.2,2,1,7,present
64,1,3,140,335,0,0,158,0,0,1,0,3,present
43,1,4,150,247,0,0,171,0,1.5,1,0,3,absent
58,0,3,120,340,0,0,172,0,0,1,0,3,absent
60,1,4,130,206,0,2,132,1,2.4,2,2,7,present
58,1,2,120,284,0,2,160,0,1.8,2,0,3,present
49,1,2,130,266,0,0,171,0,0.6,1,0,3,absent
48,1,2,110,229,0,0,168,0,1,3,0,7,present
52,1,3,172,199,1,0,162,0,0.5,1,0,7,absent
44,1,2,120,263,0,0,173,0,0,1,0,7,absent
56,0,2,140,294,0,2,153,0,1.3,2,0,3,absent
57,1,4,140,192,0,0,148,0,0.4,2,0,6,absent
67,1,4,160,286,0,2,108,1,1.5,2,3,3,present