commit inicial

This commit is contained in:
2025-04-30 11:11:49 +02:00
commit e144d65e11
121 changed files with 53649 additions and 0 deletions

33
tests/CMakeLists.txt Normal file
View File

@@ -0,0 +1,33 @@
if(ENABLE_TESTING)
include_directories(
${BayesNet_SOURCE_DIR}/tests/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/mdlp/src
${BayesNet_SOURCE_DIR}/lib/log
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}
${CMAKE_BINARY_DIR}/configured_files/include
)
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestXSPnDE.cc TestXBA2DE.cc
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES})
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
add_test(NAME XSPODE COMMAND TestBayesNet "[XSPODE]")
add_test(NAME XSPnDE COMMAND TestBayesNet "[XSPnDE]")
add_test(NAME XBAODE COMMAND TestBayesNet "[XBAODE]")
add_test(NAME XBA2DE COMMAND TestBayesNet "[XBA2DE]")
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
add_test(NAME Models COMMAND TestBayesNet "[Models]")
add_test(NAME Modules COMMAND TestBayesNet "[Modules]")
add_test(NAME Network COMMAND TestBayesNet "[Network]")
add_test(NAME Node COMMAND TestBayesNet "[Node]")
add_test(NAME MST COMMAND TestBayesNet "[MST]")
endif(ENABLE_TESTING)

49
tests/TestA2DE.cc Normal file
View File

@@ -0,0 +1,49 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <type_traits>
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "bayesnet/ensembles/A2DE.h"
#include "TestUtils.h"
TEST_CASE("Fit and Score", "[A2DE]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::A2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.831776).epsilon(raw.epsilon));
REQUIRE(clf.getNumberOfNodes() == 360);
REQUIRE(clf.getNumberOfEdges() == 756);
REQUIRE(clf.getNotes().size() == 0);
}
TEST_CASE("Test score with predict_voting", "[A2DE]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::A2DE(true);
auto hyperparameters = nlohmann::json{
{"predict_voting", true},
};
clf.setHyperparameters(hyperparameters);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.82243).epsilon(raw.epsilon));
hyperparameters["predict_voting"] = false;
clf.setHyperparameters(hyperparameters);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.83178).epsilon(raw.epsilon));
}
TEST_CASE("Test graph", "[A2DE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::A2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = clf.graph();
REQUIRE(graph.size() == 78);
REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet A2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
REQUIRE(graph[1] == "\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n");
}

View File

@@ -0,0 +1,125 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include <string>
#include "TestUtils.h"
#include "bayesnet/classifiers/TAN.h"
#include "bayesnet/classifiers/KDB.h"
#include "bayesnet/classifiers/KDBLd.h"
TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
raw.yv.pop_back();
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
}
TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
auto yshort = torch::zeros({ 149 }, torch::kInt32);
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
}
TEST_CASE("Invalid data type", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", false);
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "dataset (X, y) must be of type Integer");
}
TEST_CASE("Invalid number of features", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0);
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "Classifier: X 5 and features 4 must have the same number of features");
}
TEST_CASE("Invalid class name", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), "class name not found in states");
}
TEST_CASE("Invalid feature name", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
auto statest = raw.states;
statest.erase("petallength");
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), "feature [petallength] not found in states");
}
TEST_CASE("Invalid hyperparameter", "[Classifier]")
{
auto model = bayesnet::KDB(2);
auto raw = RawDatasets("iris", true);
REQUIRE_THROWS_AS(model.setHyperparameters({ { "alpha", "0.0" } }), std::invalid_argument);
REQUIRE_THROWS_WITH(model.setHyperparameters({ { "alpha", "0.0" } }), "Invalid hyperparameters{\"alpha\":\"0.0\"}");
}
TEST_CASE("Topological order", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto order = model.topological_order();
REQUIRE(order.size() == 4);
REQUIRE(order[0] == "petallength");
REQUIRE(order[1] == "sepallength");
REQUIRE(order[2] == "sepalwidth");
REQUIRE(order[3] == "petalwidth");
}
TEST_CASE("Dump_cpt", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto cpt = model.dump_cpt();
REQUIRE(cpt.size() == 1718);
}
TEST_CASE("Not fitted model", "[Classifier]")
{
auto model = bayesnet::TAN();
auto raw = RawDatasets("iris", true);
auto message = "Classifier has not been fitted";
// tensors
REQUIRE_THROWS_AS(model.predict(raw.Xt), std::logic_error);
REQUIRE_THROWS_WITH(model.predict(raw.Xt), message);
REQUIRE_THROWS_AS(model.predict_proba(raw.Xt), std::logic_error);
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xt), message);
REQUIRE_THROWS_AS(model.score(raw.Xt, raw.yt), std::logic_error);
REQUIRE_THROWS_WITH(model.score(raw.Xt, raw.yt), message);
// vectors
REQUIRE_THROWS_AS(model.predict(raw.Xv), std::logic_error);
REQUIRE_THROWS_WITH(model.predict(raw.Xv), message);
REQUIRE_THROWS_AS(model.predict_proba(raw.Xv), std::logic_error);
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xv), message);
REQUIRE_THROWS_AS(model.score(raw.Xv, raw.yv), std::logic_error);
REQUIRE_THROWS_WITH(model.score(raw.Xv, raw.yv), message);
}
TEST_CASE("KDB Graph", "[Classifier]")
{
auto model = bayesnet::KDB(2);
auto raw = RawDatasets("iris", true);
model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = model.graph();
REQUIRE(graph.size() == 15);
}
TEST_CASE("KDBLd Graph", "[Classifier]")
{
auto model = bayesnet::KDBLd(2);
auto raw = RawDatasets("iris", false);
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = model.graph();
REQUIRE(graph.size() == 15);
}

126
tests/TestBayesEnsemble.cc Normal file
View File

@@ -0,0 +1,126 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <type_traits>
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "bayesnet/ensembles/BoostAODE.h"
#include "bayesnet/ensembles/AODE.h"
#include "bayesnet/ensembles/AODELd.h"
#include "TestUtils.h"
TEST_CASE("Topological Order", "[Ensemble]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto order = clf.topological_order();
REQUIRE(order.size() == 0);
}
TEST_CASE("Dump CPT", "[Ensemble]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto dump = clf.dump_cpt();
REQUIRE(dump.size() == 39916);
}
TEST_CASE("Number of States", "[Ensemble]")
{
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfStates() == 76);
}
TEST_CASE("Show", "[Ensemble]")
{
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true);
clf.setHyperparameters({
{"bisection", false},
{"maxTolerance", 1},
{"convergence", false},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::string> expected = {
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> sepallength, sepalwidth, petalwidth, ",
"petalwidth -> ",
"sepallength -> ",
"sepalwidth -> ",
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> ",
"petalwidth -> sepallength, sepalwidth, petallength, ",
"sepallength -> ",
"sepalwidth -> ",
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> ",
"petalwidth -> ",
"sepallength -> sepalwidth, petallength, petalwidth, ",
"sepalwidth -> ",
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> ",
"petalwidth -> ",
"sepallength -> ",
"sepalwidth -> sepallength, petallength, petalwidth, ",
};
auto show = clf.show();
REQUIRE(show.size() == expected.size());
for (size_t i = 0; i < show.size(); i++)
REQUIRE(show[i] == expected[i]);
}
TEST_CASE("Graph", "[Ensemble]")
{
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = clf.graph();
REQUIRE(graph.size() == 56);
auto clf2 = bayesnet::AODE();
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
graph = clf2.graph();
REQUIRE(graph.size() == 56);
raw = RawDatasets("glass", false);
auto clf3 = bayesnet::AODELd();
clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
graph = clf3.graph();
REQUIRE(graph.size() == 261);
}
TEST_CASE("Compute ArgMax", "[Ensemble]")
{
class TestEnsemble : public bayesnet::BoostAODE {
public:
TestEnsemble() : bayesnet::BoostAODE() {}
torch::Tensor compute_arg_max(torch::Tensor& X) { return Ensemble::compute_arg_max(X); }
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X) { return Ensemble::compute_arg_max(X); }
};
TestEnsemble clf;
std::vector<std::vector<double>> X = {
{0.1f, 0.2f, 0.3f},
{0.4f, 0.9f, 0.6f},
{0.7f, 0.8f, 0.9f},
{0.5f, 0.2f, 0.1f},
{0.3f, 0.7f, 0.2f},
{0.5f, 0.5f, 0.2f}
};
std::vector<int> expected = { 2, 1, 2, 0, 1, 0 };
auto argmax = clf.compute_arg_max(X);
REQUIRE(argmax.size() == expected.size());
REQUIRE(argmax == expected);
auto Xt = torch::zeros({ 6, 3 }, torch::kFloat32);
Xt[0][0] = 0.1f; Xt[0][1] = 0.2f; Xt[0][2] = 0.3f;
Xt[1][0] = 0.4f; Xt[1][1] = 0.9f; Xt[1][2] = 0.6f;
Xt[2][0] = 0.7f; Xt[2][1] = 0.8f; Xt[2][2] = 0.9f;
Xt[3][0] = 0.5f; Xt[3][1] = 0.2f; Xt[3][2] = 0.1f;
Xt[4][0] = 0.3f; Xt[4][1] = 0.7f; Xt[4][2] = 0.2f;
Xt[5][0] = 0.5f; Xt[5][1] = 0.5f; Xt[5][2] = 0.2f;
auto argmaxt = clf.compute_arg_max(Xt);
REQUIRE(argmaxt.size(0) == expected.size());
for (int i = 0; i < argmaxt.size(0); i++)
REQUIRE(argmaxt[i].item<int>() == expected[i]);
}

267
tests/TestBayesMetrics.cc Normal file
View File

@@ -0,0 +1,267 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "bayesnet/utils/BayesMetrics.h"
#include "TestUtils.h"
#include "Timer.h"
TEST_CASE("Metrics Test", "[Metrics]")
{
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
map<std::string, pair<int, std::vector<int>>> resultsKBest = {
{"glass", {7, { 0, 1, 7, 6, 3, 5, 2 }}},
{"iris", {3, { 0, 3, 2 }} },
{"ecoli", {6, { 2, 4, 1, 0, 6, 5 }}},
{"diabetes", {2, { 7, 1 }}}
};
map<std::string, double> resultsMI = {
{"glass", 0.12805398},
{"iris", 0.3158139948},
{"ecoli", 0.0089431099},
{"diabetes", 0.0345470614}
};
map<pair<std::string, int>, std::vector<pair<int, int>>> resultsMST = {
{ {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {3, 4}, {5, 1}, {5, 8}, {6, 2}, {6, 7} } },
{ {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {0, 6}, {0, 3}, {3, 4}, {6, 2}, {6, 7} } },
{ {"iris", 0}, { {0, 1}, {0, 2}, {1, 3} } },
{ {"iris", 1}, { {1, 0}, {1, 3}, {0, 2} } },
{ {"ecoli", 0}, { {0, 1}, {0, 2}, {1, 5}, {1, 3}, {5, 6}, {5, 4} } },
{ {"ecoli", 1}, { {1, 0}, {1, 5}, {1, 3}, {5, 6}, {5, 4}, {0, 2} } },
{ {"diabetes", 0}, { {0, 7}, {0, 2}, {0, 6}, {2, 3}, {3, 4}, {3, 5}, {4, 1} } },
{ {"diabetes", 1}, { {1, 4}, {4, 3}, {3, 2}, {3, 5}, {2, 0}, {0, 7}, {0, 6} } }
};
auto raw = RawDatasets(file_name, true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
bayesnet::Metrics metricsv(raw.Xv, raw.yv, raw.features, raw.className, raw.classNumStates);
SECTION("Test Constructor")
{
REQUIRE(metrics.getScoresKBest().size() == 0);
REQUIRE(metricsv.getScoresKBest().size() == 0);
}
SECTION("Test SelectKBestWeighted")
{
std::vector<int> kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
std::vector<int> kBestv = metricsv.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
REQUIRE(kBest.size() == resultsKBest.at(file_name).first);
REQUIRE(kBestv.size() == resultsKBest.at(file_name).first);
REQUIRE(kBest == resultsKBest.at(file_name).second);
REQUIRE(kBestv == resultsKBest.at(file_name).second);
}
SECTION("Test Mutual Information")
{
auto result = metrics.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
auto resultv = metricsv.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
REQUIRE(result == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
REQUIRE(resultv == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
}
SECTION("Test Maximum Spanning Tree")
{
auto weights_matrix = metrics.conditionalEdge(raw.weights);
auto weights_matrixv = metricsv.conditionalEdge(raw.weights);
for (int i = 0; i < 2; ++i) {
auto result = metrics.maximumSpanningTree(raw.features, weights_matrix, i);
auto resultv = metricsv.maximumSpanningTree(raw.features, weights_matrixv, i);
REQUIRE(result == resultsMST.at({ file_name, i }));
REQUIRE(resultv == resultsMST.at({ file_name, i }));
}
}
}
TEST_CASE("Select all features ordered by Mutual Information", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0);
REQUIRE(kBest.size() == raw.features.size());
REQUIRE(kBest == std::vector<int>({ 1, 0, 3, 2 }));
}
TEST_CASE("Entropy Test", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto result = metrics.entropy(raw.dataset.index({ 0, "..." }), raw.weights);
REQUIRE(result == Catch::Approx(0.9848175048828125).epsilon(raw.epsilon));
auto data = torch::tensor({ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1 }, torch::kInt32);
auto weights = torch::tensor({ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, torch::kFloat32);
result = metrics.entropy(data, weights);
REQUIRE(result == Catch::Approx(0.61086434125900269).epsilon(raw.epsilon));
data = torch::tensor({ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 }, torch::kInt32);
result = metrics.entropy(data, weights);
REQUIRE(result == Catch::Approx(0.693147180559945).epsilon(raw.epsilon));
}
TEST_CASE("Conditional Entropy", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto expected = std::map<std::pair<int, int>, double>{
{ { 0, 1 }, 1.32674 },
{ { 0, 2 }, 0.236253 },
{ { 0, 3 }, 0.1202 },
{ { 1, 2 }, 0.252551 },
{ { 1, 3 }, 0.10515 },
{ { 2, 3 }, 0.108323 },
};
for (int i = 0; i < raw.features.size() - 1; ++i) {
for (int j = i + 1; j < raw.features.size(); ++j) {
double result = metrics.conditionalEntropy(raw.dataset.index({ i, "..." }), raw.dataset.index({ j, "..." }), raw.yt, raw.weights);
REQUIRE(result == Catch::Approx(expected.at({ i, j })).epsilon(raw.epsilon));
}
}
}
TEST_CASE("Conditional Mutual Information", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto expected = std::map<std::pair<int, int>, double>{
{ { 0, 1 }, 0.0 },
{ { 0, 2 }, 0.287696 },
{ { 0, 3 }, 0.403749 },
{ { 1, 2 }, 1.17112 },
{ { 1, 3 }, 1.31852 },
{ { 2, 3 }, 0.210068 },
};
for (int i = 0; i < raw.features.size() - 1; ++i) {
for (int j = i + 1; j < raw.features.size(); ++j) {
double result = metrics.conditionalMutualInformation(raw.dataset.index({ i, "..." }), raw.dataset.index({ j, "..." }), raw.yt, raw.weights);
REQUIRE(result == Catch::Approx(expected.at({ i, j })).epsilon(raw.epsilon));
}
}
}
TEST_CASE("Select K Pairs descending", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
std::vector<int> empty;
auto results = metrics.SelectKPairs(raw.weights, empty, false);
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
{ { 1, 3 }, 1.31852 },
{ { 1, 2 }, 1.17112 },
{ { 0, 3 }, 0.403749 },
{ { 0, 2 }, 0.287696 },
{ { 2, 3 }, 0.210068 },
{ { 0, 1 }, 0.0 },
};
auto scores = metrics.getScoresKPairs();
for (int i = 0; i < results.size(); ++i) {
auto result = results[i];
auto expect = expected[i];
auto score = scores[i];
REQUIRE(result.first == expect.first.first);
REQUIRE(result.second == expect.first.second);
REQUIRE(score.first.first == expect.first.first);
REQUIRE(score.first.second == expect.first.second);
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
}
REQUIRE(results.size() == 6);
REQUIRE(scores.size() == 6);
}
TEST_CASE("Select K Pairs ascending", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
std::vector<int> empty;
auto results = metrics.SelectKPairs(raw.weights, empty, true);
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
{ { 0, 1 }, 0.0 },
{ { 2, 3 }, 0.210068 },
{ { 0, 2 }, 0.287696 },
{ { 0, 3 }, 0.403749 },
{ { 1, 2 }, 1.17112 },
{ { 1, 3 }, 1.31852 },
};
auto scores = metrics.getScoresKPairs();
for (int i = 0; i < results.size(); ++i) {
auto result = results[i];
auto expect = expected[i];
auto score = scores[i];
REQUIRE(result.first == expect.first.first);
REQUIRE(result.second == expect.first.second);
REQUIRE(score.first.first == expect.first.first);
REQUIRE(score.first.second == expect.first.second);
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
}
REQUIRE(results.size() == 6);
REQUIRE(scores.size() == 6);
}
TEST_CASE("Select K Pairs with features excluded", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
std::vector<int> excluded = { 0, 3 };
auto results = metrics.SelectKPairs(raw.weights, excluded, true);
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
{ { 1, 2 }, 1.17112 },
};
auto scores = metrics.getScoresKPairs();
for (int i = 0; i < results.size(); ++i) {
auto result = results[i];
auto expect = expected[i];
auto score = scores[i];
REQUIRE(result.first == expect.first.first);
REQUIRE(result.second == expect.first.second);
REQUIRE(score.first.first == expect.first.first);
REQUIRE(score.first.second == expect.first.second);
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
}
REQUIRE(results.size() == 1);
REQUIRE(scores.size() == 1);
}
TEST_CASE("Select K Pairs with number of pairs descending", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
std::vector<int> empty;
auto results = metrics.SelectKPairs(raw.weights, empty, false, 3);
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
{ { 1, 3 }, 1.31852 },
{ { 1, 2 }, 1.17112 },
{ { 0, 3 }, 0.403749 }
};
auto scores = metrics.getScoresKPairs();
REQUIRE(results.size() == 3);
REQUIRE(scores.size() == 3);
for (int i = 0; i < results.size(); ++i) {
auto result = results[i];
auto expect = expected[i];
auto score = scores[i];
REQUIRE(result.first == expect.first.first);
REQUIRE(result.second == expect.first.second);
REQUIRE(score.first.first == expect.first.first);
REQUIRE(score.first.second == expect.first.second);
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
}
}
TEST_CASE("Select K Pairs with number of pairs ascending", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
std::vector<int> empty;
auto results = metrics.SelectKPairs(raw.weights, empty, true, 3);
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
{ { 0, 3 }, 0.403749 },
{ { 1, 2 }, 1.17112 },
{ { 1, 3 }, 1.31852 }
};
auto scores = metrics.getScoresKPairs();
REQUIRE(results.size() == 3);
REQUIRE(scores.size() == 3);
for (int i = 0; i < results.size(); ++i) {
auto result = results[i];
auto expect = expected[i];
auto score = scores[i];
REQUIRE(result.first == expect.first.first);
REQUIRE(result.second == expect.first.second);
REQUIRE(score.first.first == expect.first.first);
REQUIRE(score.first.second == expect.first.second);
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
}
}

381
tests/TestBayesModels.cc Normal file
View File

@@ -0,0 +1,381 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_approx.hpp>
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "TestUtils.h"
#include "bayesnet/classifiers/KDB.h"
#include "bayesnet/classifiers/KDBLd.h"
#include "bayesnet/classifiers/SPODE.h"
#include "bayesnet/classifiers/SPODELd.h"
#include "bayesnet/classifiers/TAN.h"
#include "bayesnet/classifiers/TANLd.h"
#include "bayesnet/classifiers/XSPODE.h"
#include "bayesnet/ensembles/AODE.h"
#include "bayesnet/ensembles/AODELd.h"
#include "bayesnet/ensembles/BoostAODE.h"
const std::string ACTUAL_VERSION = "1.1.0";
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{
map<pair<std::string, std::string>, float> scores{// Diabetes
{{"diabetes", "AODE"}, 0.82161},
{{"diabetes", "KDB"}, 0.852865},
{{"diabetes", "XSPODE"}, 0.631510437f},
{{"diabetes", "SPODE"}, 0.802083},
{{"diabetes", "TAN"}, 0.821615},
{{"diabetes", "AODELd"}, 0.8125f},
{{"diabetes", "KDBLd"}, 0.80208f},
{{"diabetes", "SPODELd"}, 0.7890625f},
{{"diabetes", "TANLd"}, 0.803385437f},
{{"diabetes", "BoostAODE"}, 0.83984f},
// Ecoli
{{"ecoli", "AODE"}, 0.889881},
{{"ecoli", "KDB"}, 0.889881},
{{"ecoli", "XSPODE"}, 0.696428597f},
{{"ecoli", "SPODE"}, 0.880952},
{{"ecoli", "TAN"}, 0.892857},
{{"ecoli", "AODELd"}, 0.875f},
{{"ecoli", "KDBLd"}, 0.880952358f},
{{"ecoli", "SPODELd"}, 0.839285731f},
{{"ecoli", "TANLd"}, 0.848214269f},
{{"ecoli", "BoostAODE"}, 0.89583f},
// Glass
{{"glass", "AODE"}, 0.79439},
{{"glass", "KDB"}, 0.827103},
{{"glass", "XSPODE"}, 0.775701},
{{"glass", "SPODE"}, 0.775701},
{{"glass", "TAN"}, 0.827103},
{{"glass", "AODELd"}, 0.799065411f},
{{"glass", "KDBLd"}, 0.82710278f},
{{"glass", "SPODELd"}, 0.780373812f},
{{"glass", "TANLd"}, 0.869158864f},
{{"glass", "BoostAODE"}, 0.84579f},
// Iris
{{"iris", "AODE"}, 0.973333},
{{"iris", "KDB"}, 0.973333},
{{"iris", "XSPODE"}, 0.853333354f},
{{"iris", "SPODE"}, 0.973333},
{{"iris", "TAN"}, 0.973333},
{{"iris", "AODELd"}, 0.973333},
{{"iris", "KDBLd"}, 0.973333},
{{"iris", "SPODELd"}, 0.96f},
{{"iris", "TANLd"}, 0.97333f},
{{"iris", "BoostAODE"}, 0.98f} };
std::map<std::string, bayesnet::BaseClassifier*> models{ {"AODE", new bayesnet::AODE()},
{"AODELd", new bayesnet::AODELd()},
{"BoostAODE", new bayesnet::BoostAODE()},
{"KDB", new bayesnet::KDB(2)},
{"KDBLd", new bayesnet::KDBLd(2)},
{"XSPODE", new bayesnet::XSpode(1)},
{"SPODE", new bayesnet::SPODE(1)},
{"SPODELd", new bayesnet::SPODELd(1)},
{"TAN", new bayesnet::TAN()},
{"TANLd", new bayesnet::TANLd()} };
std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "XSPODE", "SPODELd", "TAN", "TANLd");
auto clf = models[name];
SECTION("Test " + name + " classifier")
{
for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) {
auto clf = models[name];
auto discretize = name.substr(name.length() - 2) != "Ld";
auto raw = RawDatasets(file_name, discretize);
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf->score(raw.Xt, raw.yt);
// std::cout << "Classifier: " << name << " File: " << file_name << " Score: " << score << " expected = " <<
// scores[{file_name, name}] << std::endl;
INFO("Classifier: " << name << " File: " << file_name);
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
}
}
SECTION("Library check version")
{
INFO("Checking version of " << name << " classifier");
REQUIRE(clf->getVersion() == ACTUAL_VERSION);
}
delete clf;
}
TEST_CASE("Models features & Graph", "[Models]")
{
auto graph = std::vector<std::string>(
{ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
"\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
"\"class\" -> \"sepallength\"", "\"class\" -> \"sepalwidth\"", "\"class\" -> \"petallength\"",
"\"class\" -> \"petalwidth\"", "\"petallength\" [shape=circle] \n", "\"petallength\" -> \"sepallength\"",
"\"petalwidth\" [shape=circle] \n", "\"sepallength\" [shape=circle] \n", "\"sepallength\" -> \"sepalwidth\"",
"\"sepalwidth\" [shape=circle] \n", "\"sepalwidth\" -> \"petalwidth\"", "}\n" });
SECTION("Test TAN")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::TAN();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.getNumberOfStates() == 19);
REQUIRE(clf.getClassNumStates() == 3);
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> sepallength, ", "petalwidth -> ",
"sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph);
}
SECTION("Test TANLd")
{
auto clf = bayesnet::TANLd();
auto raw = RawDatasets("iris", false);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.getNumberOfStates() == 27);
REQUIRE(clf.getClassNumStates() == 3);
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> sepallength, ", "petalwidth -> ",
"sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph);
}
}
TEST_CASE("Get num features & num edges", "[Models]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
}
TEST_CASE("Model predict_proba", "[Models]")
{
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
auto res_prob_tan = std::vector<std::vector<double>>({ {0.00375671, 0.994457, 0.00178621},
{0.00137462, 0.992734, 0.00589123},
{0.00137462, 0.992734, 0.00589123},
{0.00137462, 0.992734, 0.00589123},
{0.00218225, 0.992877, 0.00494094},
{0.00494209, 0.0978534, 0.897205},
{0.0054192, 0.974275, 0.0203054},
{0.00433012, 0.985054, 0.0106159},
{0.000860806, 0.996922, 0.00221698} });
auto res_prob_spode = std::vector<std::vector<double>>({ {0.00419032, 0.994247, 0.00156265},
{0.00172808, 0.993433, 0.00483862},
{0.00172808, 0.993433, 0.00483862},
{0.00172808, 0.993433, 0.00483862},
{0.00279211, 0.993737, 0.00347077},
{0.0120674, 0.357909, 0.630024},
{0.00386239, 0.913919, 0.0822185},
{0.0244389, 0.966447, 0.00911374},
{0.003135, 0.991799, 0.0050661} });
auto res_prob_baode = std::vector<std::vector<double>>({ {0.0112349, 0.962274, 0.0264907},
{0.00371025, 0.950592, 0.0456973},
{0.00371025, 0.950592, 0.0456973},
{0.00371025, 0.950592, 0.0456973},
{0.00369275, 0.84967, 0.146637},
{0.0252205, 0.113564, 0.861215},
{0.0284828, 0.770524, 0.200993},
{0.0213182, 0.857189, 0.121493},
{0.00868436, 0.949494, 0.0418215} });
auto res_prob_voting = std::vector<std::vector<double>>(
{ {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0} });
std::map<std::string, std::vector<std::vector<double>>> res_prob{ {"TAN", res_prob_tan},
{"SPODE", res_prob_spode},
{"BoostAODEproba", res_prob_baode},
{"BoostAODEvoting", res_prob_voting} };
std::map<std::string, bayesnet::BaseClassifier*> models{ {"TAN", new bayesnet::TAN()},
{"SPODE", new bayesnet::SPODE(0)},
{"BoostAODEproba", new bayesnet::BoostAODE(false)},
{"BoostAODEvoting", new bayesnet::BoostAODE(true)} };
int init_index = 78;
auto raw = RawDatasets("iris", true);
SECTION("Test " + model + " predict_proba")
{
auto clf = models[model];
clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto y_pred_proba = clf->predict_proba(raw.Xv);
auto yt_pred_proba = clf->predict_proba(raw.Xt);
auto y_pred = clf->predict(raw.Xv);
auto yt_pred = clf->predict(raw.Xt);
REQUIRE(y_pred.size() == yt_pred.size(0));
REQUIRE(y_pred.size() == y_pred_proba.size());
REQUIRE(y_pred.size() == yt_pred_proba.size(0));
REQUIRE(y_pred.size() == raw.yv.size());
REQUIRE(y_pred_proba[0].size() == 3);
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
for (int i = 0; i < 9; ++i) {
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
REQUIRE(predictedClass == y_pred[i]);
// Check predict is coherent with predict_proba
REQUIRE(yt_pred_proba[i].argmax().item<int>() == y_pred[i]);
for (int j = 0; j < yt_pred_proba.size(1); j++) {
REQUIRE(yt_pred_proba[i][j].item<double>() == Catch::Approx(y_pred_proba[i][j]).epsilon(raw.epsilon));
}
}
// Check predict_proba values for vectors and tensors
for (int i = 0; i < 9; i++) {
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
for (int j = 0; j < 3; j++) {
REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
REQUIRE(res_prob[model][i][j] ==
Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
}
}
delete clf;
}
}
TEST_CASE("AODE voting-proba", "[Models]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::AODE(false);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({
{"predict_voting", true},
});
auto score_voting = clf.score(raw.Xv, raw.yv);
auto pred_voting = clf.predict_proba(raw.Xv);
REQUIRE(score_proba == Catch::Approx(0.79439f).epsilon(raw.epsilon));
REQUIRE(score_voting == Catch::Approx(0.78972f).epsilon(raw.epsilon));
REQUIRE(pred_voting[67][0] == Catch::Approx(0.888889).epsilon(raw.epsilon));
REQUIRE(pred_proba[67][0] == Catch::Approx(0.702184).epsilon(raw.epsilon));
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("SPODELd dataset", "[Models]")
{
auto raw = RawDatasets("iris", false);
auto clf = bayesnet::SPODELd(0);
// raw.dataset.to(torch::kFloat32);
clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xt, raw.yt);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon));
}
TEST_CASE("KDB with hyperparameters", "[Models]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv);
clf.setHyperparameters({
{"k", 3},
{"theta", 0.7},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto scoret = clf.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
}
TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::SPODELd(0);
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
}
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
{
auto clf = bayesnet::AODE();
auto raw = RawDatasets("iris", true);
std::string message = "Ensemble has not been fitted";
REQUIRE_THROWS_AS(clf.predict(raw.Xv), std::logic_error);
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xv), std::logic_error);
REQUIRE_THROWS_AS(clf.predict(raw.Xt), std::logic_error);
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xt), std::logic_error);
REQUIRE_THROWS_AS(clf.score(raw.Xv, raw.yv), std::logic_error);
REQUIRE_THROWS_AS(clf.score(raw.Xt, raw.yt), std::logic_error);
REQUIRE_THROWS_WITH(clf.predict(raw.Xv), message);
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xv), message);
REQUIRE_THROWS_WITH(clf.predict(raw.Xt), message);
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xt), message);
REQUIRE_THROWS_WITH(clf.score(raw.Xv, raw.yv), message);
REQUIRE_THROWS_WITH(clf.score(raw.Xt, raw.yt), message);
}
TEST_CASE("TAN & SPODE with hyperparameters", "[Models]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::TAN();
clf.setHyperparameters({
{"parent", 1},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.973333).epsilon(raw.epsilon));
auto clf2 = bayesnet::SPODE(0);
clf2.setHyperparameters({
{"parent", 1},
});
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score2 = clf2.score(raw.Xv, raw.yv);
REQUIRE(score2 == Catch::Approx(0.973333).epsilon(raw.epsilon));
}
TEST_CASE("TAN & SPODE with invalid hyperparameters", "[Models]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::TAN();
clf.setHyperparameters({
{"parent", 5},
});
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
std::invalid_argument);
auto clf2 = bayesnet::SPODE(0);
clf2.setHyperparameters({
{"parent", 5},
});
REQUIRE_THROWS_AS(clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
std::invalid_argument);
}
TEST_CASE("Check proposal checkInput", "[Models]")
{
class testProposal : public bayesnet::Proposal {
public:
testProposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_)
: Proposal(dataset_, features_, className_)
{
}
void test_X_y(const torch::Tensor& X, const torch::Tensor& y) { checkInput(X, y); }
};
auto raw = RawDatasets("iris", true);
auto clf = testProposal(raw.dataset, raw.features, raw.className);
torch::Tensor X = torch::randint(0, 3, { 10, 4 });
torch::Tensor y = torch::rand({ 10 });
INFO("Check X is not float");
REQUIRE_THROWS_AS(clf.test_X_y(X, y), std::invalid_argument);
X = torch::rand({ 10, 4 });
INFO("Check y is not integer");
REQUIRE_THROWS_AS(clf.test_X_y(X, y), std::invalid_argument);
y = torch::randint(0, 3, { 10 });
INFO("X and y are correct");
REQUIRE_NOTHROW(clf.test_X_y(X, y));
}
TEST_CASE("Check KDB loop detection", "[Models]")
{
class testKDB : public bayesnet::KDB {
public:
testKDB() : KDB(2, 0) {}
void test_add_m_edges(std::vector<std::string> features_, int idx, std::vector<int>& S, torch::Tensor& weights)
{
features = features_;
add_m_edges(idx, S, weights);
}
};
auto clf = testKDB();
auto features = std::vector<std::string>{ "A", "B", "C" };
int idx = 0;
std::vector<int> S = { 0 };
torch::Tensor weights = torch::tensor({
{ 1.0, 10.0, 0.0 }, // row0 -> picks col1
{ 0.0, 1.0, 10.0 }, // row1 -> picks col2
{ 10.0, 0.0, 1.0 }, // row2 -> picks col0
});
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 0, S, weights));
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 1, S, weights));
}

598
tests/TestBayesNetwork.cc Normal file
View File

@@ -0,0 +1,598 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include <string>
#include "TestUtils.h"
#include "bayesnet/network/Network.h"
#include "bayesnet/network/Node.h"
#include "bayesnet/utils/bayesnetUtils.h"
const double threshold = 1e-4;
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
{
std::vector<pair<int, int>> network = { {0, 1}, {0, 2}, {1, 3} };
for (const auto& feature : features) {
net.addNode(feature);
}
net.addNode(className);
for (const auto& edge : network) {
net.addEdge(features.at(edge.first), features.at(edge.second));
}
for (const auto& feature : features) {
net.addEdge(className, feature);
}
}
TEST_CASE("Test Bayesian Network", "[Network]")
{
auto raw = RawDatasets("iris", true);
auto net = bayesnet::Network();
SECTION("Test get features")
{
net.addNode("A");
net.addNode("B");
REQUIRE(net.getFeatures() == std::vector<std::string>{"A", "B"});
net.addNode("C");
REQUIRE(net.getFeatures() == std::vector<std::string>{"A", "B", "C"});
}
SECTION("Test get edges")
{
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("B", "C");
REQUIRE(net.getEdges() == std::vector<pair<std::string, std::string>>{ {"A", "B"}, { "B", "C" } });
REQUIRE(net.getNumEdges() == 2);
net.addEdge("A", "C");
REQUIRE(net.getEdges() == std::vector<pair<std::string, std::string>>{ {"A", "B"}, { "A", "C" }, { "B", "C" } });
REQUIRE(net.getNumEdges() == 3);
}
SECTION("Test getNodes")
{
net.addNode("A");
net.addNode("B");
auto& nodes = net.getNodes();
REQUIRE(nodes.count("A") == 1);
REQUIRE(nodes.count("B") == 1);
}
SECTION("Test fit Network")
{
auto net2 = bayesnet::Network();
auto net3 = bayesnet::Network();
net3.initialize();
net2.initialize();
net.initialize();
buildModel(net, raw.features, raw.className);
buildModel(net2, raw.features, raw.className);
buildModel(net3, raw.features, raw.className);
std::vector<pair<std::string, std::string>> edges = {
{"class", "sepallength"}, {"class", "sepalwidth"}, {"class", "petallength"},
{"class", "petalwidth" }, {"sepallength", "sepalwidth"}, {"sepallength", "petallength"},
{"sepalwidth", "petalwidth"}
};
REQUIRE(net.getEdges() == edges);
REQUIRE(net2.getEdges() == edges);
REQUIRE(net3.getEdges() == edges);
std::vector<std::string> features = { "sepallength", "sepalwidth", "petallength", "petalwidth", "class" };
REQUIRE(net.getFeatures() == features);
REQUIRE(net2.getFeatures() == features);
REQUIRE(net3.getFeatures() == features);
auto& nodes = net.getNodes();
auto& nodes2 = net2.getNodes();
auto& nodes3 = net3.getNodes();
// Check Nodes parents & children
for (const auto& feature : features) {
// Parents
std::vector<std::string> parents, parents2, parents3, children, children2, children3;
auto nodeParents = nodes[feature]->getParents();
auto nodeParents2 = nodes2[feature]->getParents();
auto nodeParents3 = nodes3[feature]->getParents();
transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
transform(nodeParents2.begin(), nodeParents2.end(), back_inserter(parents2), [](const auto& p) { return p->getName(); });
transform(nodeParents3.begin(), nodeParents3.end(), back_inserter(parents3), [](const auto& p) { return p->getName(); });
REQUIRE(parents == parents2);
REQUIRE(parents == parents3);
// Children
auto nodeChildren = nodes[feature]->getChildren();
auto nodeChildren2 = nodes2[feature]->getChildren();
auto nodeChildren3 = nodes2[feature]->getChildren();
transform(nodeChildren.begin(), nodeChildren.end(), back_inserter(children), [](const auto& p) { return p->getName(); });
transform(nodeChildren2.begin(), nodeChildren2.end(), back_inserter(children2), [](const auto& p) { return p->getName(); });
transform(nodeChildren3.begin(), nodeChildren3.end(), back_inserter(children3), [](const auto& p) { return p->getName(); });
REQUIRE(children == children2);
REQUIRE(children == children3);
}
// Fit networks
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(net.getStates() == net2.getStates());
REQUIRE(net.getStates() == net3.getStates());
REQUIRE(net.getFeatures() == net2.getFeatures());
REQUIRE(net.getFeatures() == net3.getFeatures());
REQUIRE(net.getClassName() == net2.getClassName());
REQUIRE(net.getClassName() == net3.getClassName());
REQUIRE(net.getNodes().size() == net2.getNodes().size());
REQUIRE(net.getNodes().size() == net3.getNodes().size());
REQUIRE(net.getEdges() == net2.getEdges());
REQUIRE(net.getEdges() == net3.getEdges());
REQUIRE(net.getNumEdges() == net2.getNumEdges());
REQUIRE(net.getNumEdges() == net3.getNumEdges());
REQUIRE(net.getClassNumStates() == net2.getClassNumStates());
REQUIRE(net.getClassNumStates() == net3.getClassNumStates());
REQUIRE(net.getSamples().size(0) == net2.getSamples().size(0));
REQUIRE(net.getSamples().size(0) == net3.getSamples().size(0));
REQUIRE(net.getSamples().size(1) == net2.getSamples().size(1));
REQUIRE(net.getSamples().size(1) == net3.getSamples().size(1));
// Check Conditional Probabilities tables
for (int i = 0; i < features.size(); ++i) {
auto feature = features.at(i);
for (const auto& feature : features) {
auto cpt = nodes[feature]->getCPT();
auto cpt2 = nodes2[feature]->getCPT();
auto cpt3 = nodes3[feature]->getCPT();
REQUIRE(cpt.equal(cpt2));
REQUIRE(cpt.equal(cpt3));
}
}
}
SECTION("Test show")
{
INFO("Test show");
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("A", "C");
auto str = net.show();
REQUIRE(str.size() == 3);
REQUIRE(str[0] == "A -> B, C, ");
REQUIRE(str[1] == "B -> ");
REQUIRE(str[2] == "C -> ");
}
SECTION("Test topological_sort")
{
INFO("Test topological sort");
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("A", "C");
auto sorted = net.topological_sort();
REQUIRE(sorted.size() == 3);
REQUIRE(sorted[0] == "A");
bool result = sorted[1] == "B" && sorted[2] == "C";
REQUIRE(result);
}
SECTION("Test graph")
{
INFO("Test graph");
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("A", "C");
auto str = net.graph("Test Graph");
REQUIRE(str.size() == 7);
REQUIRE(str[0] == "digraph BayesNet {\nlabel=<BayesNet Test Graph>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
REQUIRE(str[1] == "\"A\" [shape=circle] \n");
REQUIRE(str[2] == "\"A\" -> \"B\"");
REQUIRE(str[3] == "\"A\" -> \"C\"");
REQUIRE(str[4] == "\"B\" [shape=circle] \n");
REQUIRE(str[5] == "\"C\" [shape=circle] \n");
REQUIRE(str[6] == "}\n");
}
SECTION("Test predict")
{
INFO("Test predict");
buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<int> y_test = { 2, 2, 0, 2, 1 };
auto y_pred = net.predict(test);
REQUIRE(y_pred == y_test);
}
SECTION("Test predict_proba")
{
INFO("Test predict_proba");
buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<std::vector<double>> y_test = {
{0.450237, 0.0866621, 0.463101},
{0.244443, 0.0925922, 0.662964},
{0.913441, 0.0125857, 0.0739732},
{0.450237, 0.0866621, 0.463101},
{0.0135226, 0.971726, 0.0147519}
};
auto y_pred = net.predict_proba(test);
REQUIRE(y_pred.size() == 5);
REQUIRE(y_pred[0].size() == 3);
for (int i = 0; i < y_pred.size(); ++i) {
for (int j = 0; j < y_pred[i].size(); ++j) {
REQUIRE(y_pred[i][j] == Catch::Approx(y_test[i][j]).margin(threshold));
}
}
}
SECTION("Test score")
{
INFO("Test score");
buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = net.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
}
SECTION("Copy constructor")
{
INFO("Test copy constructor");
buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
auto net2 = bayesnet::Network(net);
REQUIRE(net.getFeatures() == net2.getFeatures());
REQUIRE(net.getEdges() == net2.getEdges());
REQUIRE(net.getNumEdges() == net2.getNumEdges());
REQUIRE(net.getStates() == net2.getStates());
REQUIRE(net.getClassName() == net2.getClassName());
REQUIRE(net.getClassNumStates() == net2.getClassNumStates());
REQUIRE(net.getSamples().size(0) == net2.getSamples().size(0));
REQUIRE(net.getSamples().size(1) == net2.getSamples().size(1));
REQUIRE(net.getNodes().size() == net2.getNodes().size());
for (const auto& feature : net.getFeatures()) {
auto& node = net.getNodes().at(feature);
auto& node2 = net2.getNodes().at(feature);
REQUIRE(node->getName() == node2->getName());
REQUIRE(node->getChildren().size() == node2->getChildren().size());
REQUIRE(node->getParents().size() == node2->getParents().size());
REQUIRE(node->getCPT().equal(node2->getCPT()));
}
}
SECTION("Network oddities")
{
INFO("Network oddities");
buildModel(net, raw.features, raw.className);
// predict without fitting
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
auto test_tensor = bayesnet::vectorToTensor(test);
REQUIRE_THROWS_AS(net.predict(test), std::logic_error);
REQUIRE_THROWS_WITH(net.predict(test), "You must call fit() before calling predict()");
REQUIRE_THROWS_AS(net.predict(test_tensor), std::logic_error);
REQUIRE_THROWS_WITH(net.predict(test_tensor), "You must call fit() before calling predict()");
REQUIRE_THROWS_AS(net.predict_proba(test), std::logic_error);
REQUIRE_THROWS_WITH(net.predict_proba(test), "You must call fit() before calling predict_proba()");
REQUIRE_THROWS_AS(net.score(raw.Xv, raw.yv), std::logic_error);
REQUIRE_THROWS_WITH(net.score(raw.Xv, raw.yv), "You must call fit() before calling predict()");
// predict with wrong data
auto netx = bayesnet::Network();
buildModel(netx, raw.features, raw.className);
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
std::vector<std::vector<int>> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} };
auto test_tensor2 = bayesnet::vectorToTensor(test2, false);
REQUIRE_THROWS_AS(netx.predict(test2), std::invalid_argument);
REQUIRE_THROWS_WITH(netx.predict(test2), "(V) Sample size (3) does not match the number of features (4)");
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::invalid_argument);
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "(T) Sample size (3) does not match the number of features (4)");
// fit with wrong data
// Weights
auto net2 = bayesnet::Network();
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit";
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), invalid_weights);
// X & y
std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)";
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), invalid_labels);
// Features
std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)";
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), invalid_features);
// Different number of features
auto net3 = bayesnet::Network();
auto test2y = { 1, 2, 3, 4, 5 };
buildModel(net3, raw.features, raw.className);
auto features3 = raw.features;
features3.pop_back();
std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)";
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), invalid_features2);
// Uninitialized network
std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()";
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), network_invalid);
// Classname
std::string invalid_classname = "Class Name not found in Network::features";
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), invalid_classname);
// Invalid feature
auto features2 = raw.features;
features2.pop_back();
features2.push_back("duck");
std::string invalid_feature = "Feature duck not found in Network::features";
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), invalid_feature);
// Add twice the same node name to the network => Nothing should happen
net.addNode("A");
net.addNode("A");
// invalid state in checkfit
auto net4 = bayesnet::Network();
buildModel(net4, raw.features, raw.className);
std::string invalid_state = "Feature sepallength not found in states";
REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), std::invalid_argument);
REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), invalid_state);
// Try to add node or edge to a fitted network
auto net5 = bayesnet::Network();
buildModel(net5, raw.features, raw.className);
net5.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE_THROWS_AS(net5.addNode("A"), std::logic_error);
REQUIRE_THROWS_WITH(net5.addNode("A"), "Cannot add node to a fitted network. Initialize first.");
REQUIRE_THROWS_AS(net5.addEdge("A", "B"), std::logic_error);
REQUIRE_THROWS_WITH(net5.addEdge("A", "B"), "Cannot add edge to a fitted network. Initialize first.");
}
}
TEST_CASE("Test and empty Node", "[Network]")
{
auto net = bayesnet::Network();
REQUIRE_THROWS_AS(net.addNode(""), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addNode(""), "Node name cannot be empty");
}
TEST_CASE("Cicle in Network", "[Network]")
{
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("B", "C");
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph.");
}
TEST_CASE("Edges troubles", "[Network]")
{
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("B");
REQUIRE_THROWS_AS(net.addEdge("A", "C"), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist");
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist");
net.addEdge("A", "B");
REQUIRE_THROWS_AS(net.addEdge("A", "B"), std::invalid_argument);
REQUIRE_THROWS_WITH(net.addEdge("A", "B"), "Edge A -> B already exists");
}
TEST_CASE("Dump CPT", "[Network]")
{
auto net = bayesnet::Network();
auto raw = RawDatasets("iris", true);
buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
auto res = net.dump_cpt();
std::string expected = R"(* class: (3) : [3]
0.3333
0.3333
0.3333
[ CPUDoubleType{3} ]
* petallength: (4) : [4, 3, 3]
(1,.,.) =
0.9388 0.1000 0.2000
0.6250 0.0526 0.1667
0.4000 0.0303 0.0196
(2,.,.) =
0.0204 0.7000 0.4000
0.1250 0.8421 0.1667
0.2000 0.7273 0.0196
(3,.,.) =
0.0204 0.1000 0.2000
0.1250 0.0526 0.5000
0.2000 0.1818 0.1373
(4,.,.) =
0.0204 0.1000 0.2000
0.1250 0.0526 0.1667
0.2000 0.0606 0.8235
[ CPUDoubleType{4,3,3} ]
* petalwidth: (3) : [3, 6, 3]
(1,.,.) =
0.5000 0.0417 0.0714
0.3333 0.1111 0.0909
0.5000 0.1000 0.2000
0.7778 0.0909 0.0667
0.8667 0.1000 0.0667
0.9394 0.2500 0.1250
(2,.,.) =
0.2500 0.9167 0.2857
0.3333 0.7778 0.1818
0.2500 0.8000 0.2000
0.1111 0.8182 0.1333
0.0667 0.7000 0.0667
0.0303 0.5000 0.1250
(3,.,.) =
0.2500 0.0417 0.6429
0.3333 0.1111 0.7273
0.2500 0.1000 0.6000
0.1111 0.0909 0.8000
0.0667 0.2000 0.8667
0.0303 0.2500 0.7500
[ CPUDoubleType{3,6,3} ]
* sepallength: (3) : [3, 3]
0.8679 0.1321 0.0377
0.0943 0.3019 0.0566
0.0377 0.5660 0.9057
[ CPUDoubleType{3,3} ]
* sepalwidth: (6) : [6, 3, 3]
(1,.,.) =
0.0392 0.5000 0.2857
0.1000 0.4286 0.2500
0.1429 0.2571 0.1887
(2,.,.) =
0.0196 0.0833 0.1429
0.1000 0.1429 0.2500
0.1429 0.1429 0.1509
(3,.,.) =
0.0392 0.0833 0.1429
0.1000 0.1429 0.1250
0.1429 0.1714 0.0566
(4,.,.) =
0.1373 0.1667 0.1429
0.1000 0.1905 0.1250
0.1429 0.1429 0.2453
(5,.,.) =
0.2549 0.0833 0.1429
0.1000 0.0476 0.1250
0.1429 0.2286 0.2453
(6,.,.) =
0.5098 0.0833 0.1429
0.5000 0.0476 0.1250
0.2857 0.0571 0.1132
[ CPUDoubleType{6,3,3} ]
)";
REQUIRE(res == expected);
}
TEST_CASE("Test Smoothing A", "[Network]")
{
/*
Tomando m = 1 Pa = 0.5
Si estoy calculando P(A | C), con C en{ 0,1,2 } y tengo :
AC = { 11, 12, 11, 10, 10, 12, 10, 01, 00, 02 }
Entonces:
P(A = 1 | C = 0) = (3 + 1 / 2 * 1) / (4 + 1) = 3.5 / 5
P(A = 0 | C = 0) = (1 + 1 / 2 * 1) / (4 + 1) = 1.5 / 5
Donde m aquí es el número de veces de C = 0 que es la que condiciona y la a priori vuelve a ser sobre A que es sobre las que estaríamos calculando esas marginales.
P(A = 1 | C = 1) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 4
P(A = 0 | C = 1) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 4
P(A = 1 | C = 2) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 5
P(A = 0 | C = 2) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 5
En realidad es parecido a Laplace, que en este caso p.e.con C = 0 sería
P(A = 1 | C = 0) = (3 + 1) / (4 + 2) = 4 / 6
P(A = 0 | C = 0) = (1 + 1) / (4 + 2) = 2 / 6
*/
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("C");
net.addEdge("C", "A");
std::vector<int> C = { 1, 2, 1, 0, 0, 2, 0, 1, 0, 2 };
std::vector<std::vector<int>> A = { { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } };
std::map<std::string, std::vector<int>> states = { { "A", {0, 1} }, { "C", {0, 1, 2} } };
auto weights = std::vector<double>(C.size(), 1);
//
// Laplace
//
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
auto cpt_c_laplace = net.getNodes().at("C")->getCPT();
REQUIRE(cpt_c_laplace.size(0) == 3);
auto laplace_c = std::vector<float>({ 0.3846, 0.3077, 0.3077 });
for (int i = 0; i < laplace_c.size(); ++i) {
REQUIRE(cpt_c_laplace.index({ i }).item<float>() == Catch::Approx(laplace_c[i]).margin(threshold));
}
auto cpt_a_laplace = net.getNodes().at("A")->getCPT();
REQUIRE(cpt_a_laplace.size(0) == 2);
REQUIRE(cpt_a_laplace.size(1) == 3);
auto laplace_a = std::vector<std::vector<float>>({ {0.3333, 0.4000,0.4000}, {0.6667, 0.6000, 0.6000} });
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
REQUIRE(cpt_a_laplace.index({ i, j }).item<float>() == Catch::Approx(laplace_a[i][j]).margin(threshold));
}
}
//
// Cestnik
//
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
auto cpt_c_cestnik = net.getNodes().at("C")->getCPT();
REQUIRE(cpt_c_cestnik.size(0) == 3);
auto cestnik_c = std::vector<float>({ 0.3939, 0.3030, 0.3030 });
for (int i = 0; i < laplace_c.size(); ++i) {
REQUIRE(cpt_c_cestnik.index({ i }).item<float>() == Catch::Approx(cestnik_c[i]).margin(threshold));
}
auto cpt_a_cestnik = net.getNodes().at("A")->getCPT();
REQUIRE(cpt_a_cestnik.size(0) == 2);
REQUIRE(cpt_a_cestnik.size(1) == 3);
auto cestnik_a = std::vector<std::vector<float>>({ {0.3000, 0.3750, 0.3750}, {0.7000, 0.6250, 0.6250} });
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
REQUIRE(cpt_a_cestnik.index({ i, j }).item<float>() == Catch::Approx(cestnik_a[i][j]).margin(threshold));
}
}
}
TEST_CASE("Test Smoothing B", "[Network]")
{
auto net = bayesnet::Network();
net.addNode("X");
net.addNode("Y");
net.addNode("Z");
net.addNode("C");
net.addEdge("C", "X");
net.addEdge("C", "Y");
net.addEdge("C", "Z");
net.addEdge("Y", "Z");
std::vector<int> C = { 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1 };
std::vector<std::vector<int>> Data = {
{ 0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0},
{ 1,2,0,2,2,2,1,0,0,1,1,1,0,1,2,1,0,2},
{ 2,1,3,3,2,0,0,1,3,2,1,2,2,3,0,0,1,2}
};
std::map<std::string, std::vector<int>> states = {
{ "X", {0, 1} },
{ "Y", {0, 1, 2} },
{ "Z", {0, 1, 2, 3} },
{ "C", {0, 1} }
};
auto weights = std::vector<double>(C.size(), 1);
// See https://www.overleaf.com/read/tfnhpfysfkfx#2d576c example for calculations
INFO("Test Smoothing B - Laplace");
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
auto laplace_values = std::vector<std::vector<float>>({ {0.377418, 0.622582}, {0.217821, 0.782179} });
auto laplace_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
for (auto i = 0; i < 2; ++i) {
for (auto j = 0; j < 2; ++j) {
REQUIRE(laplace_score.at(i).at(j) == Catch::Approx(laplace_values.at(i).at(j)).margin(threshold));
}
}
INFO("Test Smoothing B - Original");
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::ORIGINAL);
auto original_values = std::vector<std::vector<float>>({ {0.344769, 0.655231}, {0.0421263, 0.957874} });
auto original_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
for (auto i = 0; i < 2; ++i) {
for (auto j = 0; j < 2; ++j) {
REQUIRE(original_score.at(i).at(j) == Catch::Approx(original_values.at(i).at(j)).margin(threshold));
}
}
INFO("Test Smoothing B - Cestnik");
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
auto cestnik_values = std::vector<std::vector<float>>({ {0.353422, 0.646578}, {0.12364, 0.87636} });
auto cestnik_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
for (auto i = 0; i < 2; ++i) {
for (auto j = 0; j < 2; ++j) {
REQUIRE(cestnik_score.at(i).at(j) == Catch::Approx(cestnik_values.at(i).at(j)).margin(threshold));
}
}
INFO("Test Smoothing B - No smoothing");
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::NONE);
auto nosmooth_values = std::vector<std::vector<float>>({ {0.342465753, 0.65753424}, {0.0, 1.0} });
auto nosmooth_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
for (auto i = 0; i < 2; ++i) {
for (auto j = 0; j < 2; ++j) {
REQUIRE(nosmooth_score.at(i).at(j) == Catch::Approx(nosmooth_values.at(i).at(j)).margin(threshold));
}
}
}

161
tests/TestBayesNode.cc Normal file
View File

@@ -0,0 +1,161 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include <string>
#include <vector>
#include "TestUtils.h"
#include "bayesnet/network/Network.h"
TEST_CASE("Test Node children and parents", "[Node]")
{
auto node = bayesnet::Node("Node");
REQUIRE(node.getName() == "Node");
auto parent_1 = bayesnet::Node("P1");
auto parent_2 = bayesnet::Node("P2");
auto child_1 = bayesnet::Node("H1");
auto child_2 = bayesnet::Node("H2");
auto child_3 = bayesnet::Node("H3");
node.addParent(&parent_1);
node.addParent(&parent_2);
node.addChild(&child_1);
node.addChild(&child_2);
node.addChild(&child_3);
auto parents = node.getParents();
auto children = node.getChildren();
REQUIRE(parents.size() == 2);
REQUIRE(children.size() == 3);
REQUIRE(parents[0]->getName() == "P1");
REQUIRE(parents[1]->getName() == "P2");
REQUIRE(children[0]->getName() == "H1");
REQUIRE(children[1]->getName() == "H2");
REQUIRE(children[2]->getName() == "H3");
node.removeParent(&parent_1);
node.removeChild(&child_1);
parents = node.getParents();
children = node.getChildren();
REQUIRE(parents.size() == 1);
REQUIRE(children.size() == 2);
node.clear();
parents = node.getParents();
children = node.getChildren();
REQUIRE(parents.size() == 0);
REQUIRE(children.size() == 0);
}
TEST_CASE("Test Node computeCPT", "[Node]")
{
// Generate a test to test the computeCPT method of the Node class
// Create a dataset with 3 features and 4 samples
// The dataset is a 2D tensor with 4 rows and 4 columns
auto dataset = torch::tensor({ {1, 0, 0, 1}, {1, 1, 2, 0}, {0, 1, 2, 1}, {0, 1, 0, 1} });
auto states = std::vector<int>({ 2, 3, 3 });
// Create a vector with the names of the features
auto features = std::vector<std::string>{ "F1", "F2", "F3" };
// Create a vector with the names of the classes
auto className = std::string("Class");
// weights
auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 }, torch::kDouble);
std::vector<bayesnet::Node> nodes;
for (int i = 0; i < features.size(); i++) {
auto node = bayesnet::Node(features[i]);
node.setNumStates(states[i]);
nodes.push_back(node);
}
// Create node class with 2 states
nodes.push_back(bayesnet::Node(className));
nodes[features.size()].setNumStates(2);
// The network is c->f1, f2, f3 y f1->f2, f3
for (int i = 0; i < features.size(); i++) {
// Add class node as parent of all feature nodes
nodes[i].addParent(&nodes[features.size()]);
// Node[0] -> Node[1], Node[2]
if (i > 0)
nodes[i].addParent(&nodes[0]);
}
features.push_back(className);
// Compute the conditional probability table
nodes[1].computeCPT(dataset, features, 0.0, weights);
// Get the conditional probability table
auto cpTable = nodes[1].getCPT();
// Get the dimensions of the conditional probability table
auto dimensions = cpTable.sizes();
// Check the dimensions of the conditional probability table
REQUIRE(dimensions.size() == 3);
REQUIRE(dimensions[0] == 3);
REQUIRE(dimensions[1] == 2);
REQUIRE(dimensions[2] == 2);
// Check the values of the conditional probability table
REQUIRE(cpTable[0][0][0].item<float>() == Catch::Approx(0));
REQUIRE(cpTable[0][0][1].item<float>() == Catch::Approx(0));
REQUIRE(cpTable[0][1][0].item<float>() == Catch::Approx(0));
REQUIRE(cpTable[0][1][1].item<float>() == Catch::Approx(1));
REQUIRE(cpTable[1][0][0].item<float>() == Catch::Approx(0));
REQUIRE(cpTable[1][0][1].item<float>() == Catch::Approx(1));
REQUIRE(cpTable[1][1][0].item<float>() == Catch::Approx(1));
REQUIRE(cpTable[1][1][1].item<float>() == Catch::Approx(0));
// Compute evidence
for (auto& node : nodes) {
node.computeCPT(dataset, features, 0.0, weights);
}
auto evidence = std::map<std::string, int>{ { "F1", 0 }, { "F2", 1 }, { "F3", 1 } };
REQUIRE(nodes[3].getFactorValue(evidence) == 0.5);
// Oddities
auto features_back = features;
// Remove a parent from features
// features.pop_back();
// REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
// REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature parent Class not found in dataset");
// Remove a feature from features
// features = features_back;
// features.erase(features.begin());
// REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
// REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature F1 not found in dataset");
}
TEST_CASE("TEST MinFill method", "[Node]")
{
// Generate a test to test the minFill method of the Node class
// Create a graph with 5 nodes
// The graph is a chain with some additional edges
// 0 -> 1,2,3
// 1 -> 2,4
// 2 -> 3
// 3 -> 4
auto node_0 = bayesnet::Node("0");
auto node_1 = bayesnet::Node("1");
auto node_2 = bayesnet::Node("2");
auto node_3 = bayesnet::Node("3");
auto node_4 = bayesnet::Node("4");
// node 0
node_0.addChild(&node_1);
node_0.addChild(&node_2);
node_0.addChild(&node_3);
// node 1
node_1.addChild(&node_2);
node_1.addChild(&node_4);
node_1.addParent(&node_0);
// node 2
node_2.addChild(&node_3);
node_2.addChild(&node_4);
node_2.addParent(&node_0);
node_2.addParent(&node_1);
// node 3
node_3.addChild(&node_4);
node_3.addParent(&node_0);
node_3.addParent(&node_2);
// node 4
node_4.addParent(&node_1);
node_4.addParent(&node_3);
REQUIRE(node_0.minFill() == 3);
REQUIRE(node_1.minFill() == 3);
REQUIRE(node_2.minFill() == 6);
REQUIRE(node_3.minFill() == 3);
REQUIRE(node_4.minFill() == 1);
}

218
tests/TestBoostA2DE.cc Normal file
View File

@@ -0,0 +1,218 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <type_traits>
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "bayesnet/utils/BayesMetrics.h"
#include "bayesnet/ensembles/BoostA2DE.h"
#include "TestUtils.h"
TEST_CASE("Build basic model", "[BoostA2DE]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostA2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 342);
REQUIRE(clf.getNumberOfEdges() == 684);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 20");
REQUIRE(clf.getNotes()[2] == "Number of models: 38");
auto score = clf.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.919271).epsilon(raw.epsilon));
}
TEST_CASE("Feature_select IWSS", "[BoostA2DE]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostA2DE();
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 140);
REQUIRE(clf.getNumberOfEdges() == 294);
REQUIRE(clf.getNotes().size() == 4);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
REQUIRE(clf.getNotes()[3] == "Number of models: 14");
}
TEST_CASE("Feature_select FCBF", "[BoostA2DE]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostA2DE();
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 110);
REQUIRE(clf.getNumberOfEdges() == 231);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
REQUIRE(clf.getNotes()[3] == "Number of models: 11");
}
TEST_CASE("Test used features in train note and score", "[BoostA2DE]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostA2DE(true);
clf.setHyperparameters({
{"order", "asc"},
{"convergence", true},
{"select_features","CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 144);
REQUIRE(clf.getNumberOfEdges() == 288);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 16");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.856771).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.856771).epsilon(raw.epsilon));
}
TEST_CASE("Voting vs proba", "[BoostA2DE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostA2DE(false);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({
{"predict_voting",true},
});
auto score_voting = clf.score(raw.Xv, raw.yv);
auto pred_voting = clf.predict_proba(raw.Xv);
REQUIRE(score_proba == Catch::Approx(0.98).epsilon(raw.epsilon));
REQUIRE(score_voting == Catch::Approx(0.946667).epsilon(raw.epsilon));
REQUIRE(pred_voting[83][2] == Catch::Approx(0.53508).epsilon(raw.epsilon));
REQUIRE(pred_proba[83][2] == Catch::Approx(0.48394).epsilon(raw.epsilon));
REQUIRE(clf.dump_cpt().size() == 7742);
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("Order asc, desc & random", "[BoostA2DE]")
{
auto raw = RawDatasets("glass", true);
std::map<std::string, double> scores{
{"asc", 0.752336f }, { "desc", 0.813084f }, { "rand", 0.850467 }
};
for (const std::string& order : { "asc", "desc", "rand" }) {
auto clf = bayesnet::BoostA2DE();
clf.setHyperparameters({
{"order", order},
{"bisection", false},
{"maxTolerance", 1},
{"convergence", false},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("BoostA2DE order: " + order);
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
}
}
TEST_CASE("Oddities2", "[BoostA2DE]")
{
auto clf = bayesnet::BoostA2DE();
auto raw = RawDatasets("iris", true);
auto bad_hyper = nlohmann::json{
{ { "order", "duck" } },
{ { "select_features", "duck" } },
{ { "maxTolerance", 0 } },
{ { "maxTolerance", 7 } },
};
for (const auto& hyper : bad_hyper.items()) {
INFO("BoostA2DE hyper: " + hyper.value().dump());
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
}
REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
auto bad_hyper_fit = nlohmann::json{
{ { "select_features","IWSS" }, { "threshold", -0.01 } },
{ { "select_features","IWSS" }, { "threshold", 0.51 } },
{ { "select_features","FCBF" }, { "threshold", 1e-8 } },
{ { "select_features","FCBF" }, { "threshold", 1.01 } },
};
for (const auto& hyper : bad_hyper_fit.items()) {
INFO("BoostA2DE hyper: " + hyper.value().dump());
clf.setHyperparameters(hyper.value());
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
}
}
TEST_CASE("No features selected", "[BoostA2DE]")
{
// Check that the note "No features selected in initialization" is added
//
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostA2DE();
clf.setHyperparameters({ {"select_features","FCBF"}, {"threshold", 1 } });
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNotes().size() == 1);
REQUIRE(clf.getNotes()[0] == "No features selected in initialization");
}
TEST_CASE("Bisection Best", "[BoostA2DE]")
{
auto clf = bayesnet::BoostA2DE();
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
clf.setHyperparameters({
{"bisection", true},
{"maxTolerance", 3},
{"convergence", true},
{"block_update", false},
{"convergence_best", true},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 480);
REQUIRE(clf.getNumberOfEdges() == 1152);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes().at(0) == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes().at(1) == "Pairs not used in train: 83");
REQUIRE(clf.getNotes().at(2) == "Number of models: 32");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(0.966667f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.966667f).epsilon(raw.epsilon));
}
TEST_CASE("Block Update", "[BoostA2DE]")
{
auto clf = bayesnet::BoostA2DE();
auto raw = RawDatasets("spambase", true, 500);
clf.setHyperparameters({
{"bisection", true},
{"block_update", true},
{"maxTolerance", 3},
{"convergence", true},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 58);
REQUIRE(clf.getNumberOfEdges() == 165);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 1588");
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
//
// std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
// std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
// std::cout << "Notes size " << clf.getNotes().size() << std::endl;
// for (auto note : clf.getNotes()) {
// std::cout << note << std::endl;
// }
// std::cout << "Score " << score << std::endl;
}
TEST_CASE("Test graph b2a2de", "[BoostA2DE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostA2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto graph = clf.graph();
REQUIRE(graph.size() == 26);
REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet BoostA2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
REQUIRE(graph[1] == "\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n");
}

222
tests/TestBoostAODE.cc Normal file
View File

@@ -0,0 +1,222 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_approx.hpp>
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "TestUtils.h"
#include "bayesnet/ensembles/BoostAODE.h"
TEST_CASE("Feature_select CFS", "[BoostAODE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({{"select_features", "CFS"}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("Feature_select IWSS", "[BoostAODE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("Feature_select FCBF", "[BoostAODE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("Test used features in train note and score", "[BoostAODE]") {
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE(true);
clf.setHyperparameters({
{"order", "asc"},
{"convergence", true},
{"select_features", "CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.809895813).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.809895813).epsilon(raw.epsilon));
}
TEST_CASE("Voting vs proba", "[BoostAODE]") {
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostAODE(false);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({
{"predict_voting", true},
});
auto score_voting = clf.score(raw.Xv, raw.yv);
auto pred_voting = clf.predict_proba(raw.Xv);
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon));
REQUIRE(clf.dump_cpt().size() == 7004);
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("Order asc, desc & random", "[BoostAODE]") {
auto raw = RawDatasets("glass", true);
std::map<std::string, double> scores{{"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112}};
for (const std::string &order : {"asc", "desc", "rand"}) {
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({
{"order", order},
{"bisection", false},
{"maxTolerance", 1},
{"convergence", false},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("BoostAODE order: " << order);
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
}
}
TEST_CASE("Oddities", "[BoostAODE]") {
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true);
auto bad_hyper = nlohmann::json{
{{"order", "duck"}},
{{"select_features", "duck"}},
{{"maxTolerance", 0}},
{{"maxTolerance", 7}},
};
for (const auto &hyper : bad_hyper.items()) {
INFO("BoostAODE hyper: " << hyper.value().dump());
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
}
REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument);
auto bad_hyper_fit = nlohmann::json{
{{"select_features", "IWSS"}, {"threshold", -0.01}},
{{"select_features", "IWSS"}, {"threshold", 0.51}},
{{"select_features", "FCBF"}, {"threshold", 1e-8}},
{{"select_features", "FCBF"}, {"threshold", 1.01}},
};
for (const auto &hyper : bad_hyper_fit.items()) {
INFO("BoostAODE hyper: " << hyper.value().dump());
clf.setHyperparameters(hyper.value());
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
std::invalid_argument);
}
auto bad_hyper_fit2 = nlohmann::json{
{{"alpha_block", true}, {"block_update", true}},
{{"bisection", false}, {"block_update", true}},
};
for (const auto &hyper : bad_hyper_fit2.items()) {
INFO("BoostAODE hyper: " << hyper.value().dump());
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
}
}
TEST_CASE("Bisection Best", "[BoostAODE]") {
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
clf.setHyperparameters({
{"bisection", true},
{"maxTolerance", 3},
{"convergence", true},
{"block_update", false},
{"convergence_best", false},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 210);
REQUIRE(clf.getNumberOfEdges() == 378);
REQUIRE(clf.getNotes().size() == 1);
REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
}
TEST_CASE("Bisection Best vs Last", "[BoostAODE]") {
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
auto clf = bayesnet::BoostAODE(true);
auto hyperparameters = nlohmann::json{
{"bisection", true},
{"maxTolerance", 3},
{"convergence", true},
{"convergence_best", true},
};
clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_best = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
// Now we will set the hyperparameter to use the last accuracy
hyperparameters["convergence_best"] = false;
clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_last = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
}
TEST_CASE("Block Update", "[BoostAODE]") {
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("mfeat-factors", true, 500);
clf.setHyperparameters({
{"bisection", true},
{"block_update", true},
{"maxTolerance", 3},
{"convergence", true},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 868);
REQUIRE(clf.getNumberOfEdges() == 1724);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[1] == "Used features in train: 19 of 216");
REQUIRE(clf.getNotes()[2] == "Number of models: 4");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(0.99f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.99f).epsilon(raw.epsilon));
//
// std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
// std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
// std::cout << "Notes size " << clf.getNotes().size() << std::endl;
// for (auto note : clf.getNotes()) {
// std::cout << note << std::endl;
// }
// std::cout << "Score " << score << std::endl;
}
TEST_CASE("Alphablock", "[BoostAODE]") {
auto clf_alpha = bayesnet::BoostAODE();
auto clf_no_alpha = bayesnet::BoostAODE();
auto raw = RawDatasets("diabetes", true);
clf_alpha.setHyperparameters({
{"alpha_block", true},
});
clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test);
auto score_no_alpha = clf_no_alpha.score(raw.X_test, raw.y_test);
REQUIRE(score_alpha == Catch::Approx(0.720779f).epsilon(raw.epsilon));
REQUIRE(score_no_alpha == Catch::Approx(0.733766f).epsilon(raw.epsilon));
}

View File

@@ -0,0 +1,115 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "bayesnet/utils/BayesMetrics.h"
#include "bayesnet/feature_selection/CFS.h"
#include "bayesnet/feature_selection/FCBF.h"
#include "bayesnet/feature_selection/IWSS.h"
#include "TestUtils.h"
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold, int max_features = 0)
{
max_features = max_features == 0 ? raw.features.size() : max_features;
if (selector == "CFS") {
return new bayesnet::CFS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights);
} else if (selector == "FCBF") {
return new bayesnet::FCBF(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
} else if (selector == "IWSS") {
return new bayesnet::IWSS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
}
return nullptr;
}
TEST_CASE("Features Selected", "[FeatureSelection]")
{
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto raw = RawDatasets(file_name, true);
SECTION("Test features selected, scores and sizes")
{
map<pair<std::string, std::string>, pair<std::vector<int>, std::vector<double>>> results = {
{ {"glass", "CFS"}, { { 2, 3, 6, 1, 8, 4 }, {0.365513, 0.42895, 0.369809, 0.298294, 0.240952, 0.200915} } },
{ {"iris", "CFS"}, { { 3, 2, 1, 0 }, {0.870521, 0.890375, 0.588155, 0.41843} } },
{ {"ecoli", "CFS"}, { { 5, 0, 4, 2, 1, 6 }, {0.512319, 0.565381, 0.486025, 0.41087, 0.331423, 0.266251} } },
{ {"diabetes", "CFS"}, { { 1, 5, 7, 6, 4, 2 }, {0.132858, 0.151209, 0.14244, 0.126591, 0.106028, 0.0825904} } },
{ {"glass", "IWSS" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.42895, 0.359907, 0.273784, 0.223346} } },
{ {"iris", "IWSS"}, { { 3, 2, 0 }, {0.870521, 0.890375, 0.585426} }},
{ {"ecoli", "IWSS"}, { { 5, 6, 0, 1, 4 }, {0.512319, 0.550978, 0.475025, 0.382607, 0.308203} } },
{ {"diabetes", "IWSS"}, { { 1, 5, 4, 7, 3 }, {0.132858, 0.151209, 0.136576, 0.122097, 0.0802232} } },
{ {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } },
{ {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }},
{ {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }},
{ {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }}
};
double threshold;
std::string selector;
std::vector<std::pair<std::string, double>> selectors = {
{ "CFS", 0.0 },
{ "IWSS", 0.5 },
{ "FCBF", 1e-7 }
};
for (const auto item : selectors) {
selector = item.first; threshold = item.second;
bayesnet::FeatureSelect* featureSelector = build_selector(raw, selector, threshold);
featureSelector->fit();
INFO("file_name: " << file_name << ", selector: " << selector);
// Features
auto expected_features = results.at({ file_name, selector }).first;
std::vector<int> selected_features = featureSelector->getFeatures();
REQUIRE(selected_features.size() == expected_features.size());
REQUIRE(selected_features == expected_features);
// Scores
auto expected_scores = results.at({ file_name, selector }).second;
std::vector<double> selected_scores = featureSelector->getScores();
REQUIRE(selected_scores.size() == selected_features.size());
for (int i = 0; i < selected_scores.size(); i++) {
REQUIRE(selected_scores[i] == Catch::Approx(expected_scores[i]).epsilon(raw.epsilon));
}
delete featureSelector;
}
}
}
TEST_CASE("Oddities", "[FeatureSelection]")
{
auto raw = RawDatasets("iris", true);
// FCBF Limits
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
// Not fitted error
auto selector = build_selector(raw, "CFS", 0);
const std::string message = "FeatureSelect not fitted";
REQUIRE_THROWS_AS(selector->getFeatures(), std::runtime_error);
REQUIRE_THROWS_AS(selector->getScores(), std::runtime_error);
REQUIRE_THROWS_WITH(selector->getFeatures(), message);
REQUIRE_THROWS_WITH(selector->getScores(), message);
delete selector;
}
TEST_CASE("Test threshold limits", "[FeatureSelection]")
{
auto raw = RawDatasets("diabetes", true);
// FCBF Limits
auto selector = build_selector(raw, "FCBF", 0.051);
selector->fit();
REQUIRE(selector->getFeatures().size() == 2);
delete selector;
selector = build_selector(raw, "FCBF", 1e-7, 3);
selector->fit();
REQUIRE(selector->getFeatures().size() == 3);
delete selector;
selector = build_selector(raw, "IWSS", 0.5, 5);
selector->fit();
REQUIRE(selector->getFeatures().size() == 5);
delete selector;
}

72
tests/TestMST.cc Normal file
View File

@@ -0,0 +1,72 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include <string>
#include <vector>
#include "TestUtils.h"
#include "bayesnet/utils/Mst.h"
TEST_CASE("MST::insertElement tests", "[MST]")
{
bayesnet::MST mst({}, torch::tensor({}), 0);
SECTION("Insert into an empty list")
{
std::list<int> variables;
mst.insertElement(variables, 5);
REQUIRE(variables == std::list<int>{5});
}
SECTION("Insert a non-duplicate element")
{
std::list<int> variables = { 1, 2, 3 };
mst.insertElement(variables, 4);
REQUIRE(variables == std::list<int>{4, 1, 2, 3});
}
SECTION("Insert a duplicate element")
{
std::list<int> variables = { 1, 2, 3 };
mst.insertElement(variables, 2);
REQUIRE(variables == std::list<int>{1, 2, 3});
}
}
TEST_CASE("MST::reorder tests", "[MST]")
{
bayesnet::MST mst({}, torch::tensor({}), 0);
SECTION("Reorder simple graph")
{
std::vector<std::pair<float, std::pair<int, int>>> T = { {2.0, {1, 2}}, {1.0, {0, 1}} };
auto result = mst.reorder(T, 0);
REQUIRE(result == std::vector<std::pair<int, int>>{{0, 1}, { 1, 2 }});
}
SECTION("Reorder with disconnected graph")
{
std::vector<std::pair<float, std::pair<int, int>>> T = { {2.0, {2, 3}}, {1.0, {0, 1}} };
auto result = mst.reorder(T, 0);
REQUIRE(result == std::vector<std::pair<int, int>>{{0, 1}, { 2, 3 }});
}
}
TEST_CASE("MST::maximumSpanningTree tests", "[MST]")
{
std::vector<std::string> features = { "A", "B", "C" };
auto weights = torch::tensor({
{0.0, 1.0, 2.0},
{1.0, 0.0, 3.0},
{2.0, 3.0, 0.0}
});
bayesnet::MST mst(features, weights, 0);
SECTION("MST of a complete graph")
{
auto result = mst.maximumSpanningTree();
REQUIRE(result.size() == 2); // Un MST para 3 nodos tiene 2 aristas
}
}

View File

@@ -0,0 +1,43 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include <string>
#include <fimdlp/CPPFImdlp.h>
#include <folding.hpp>
#include <nlohmann/json.hpp>
#define TO_STR2(x) #x
#define TO_STR(x) TO_STR2(x)
#define JSON_VERSION (TO_STR(NLOHMANN_JSON_VERSION_MAJOR) "." TO_STR(NLOHMANN_JSON_VERSION_MINOR))
#include "TestUtils.h"
std::map<std::string, std::string> modules = {
{ "mdlp", "2.0.1" },
{ "Folding", "1.1.1" },
{ "json", "3.12" },
{ "ArffFiles", "1.1.0" }
};
TEST_CASE("MDLP", "[Modules]")
{
auto fimdlp = mdlp::CPPFImdlp();
REQUIRE(fimdlp.version() == modules["mdlp"]);
}
TEST_CASE("Folding", "[Modules]")
{
auto folding = folding::KFold(5, 200);
REQUIRE(folding.version() == modules["Folding"]);
}
TEST_CASE("NLOHMANN_JSON", "[Modules]")
{
REQUIRE(JSON_VERSION == modules["json"]);
}
TEST_CASE("ArffFiles", "[Modules]")
{
auto handler = ArffFiles();
REQUIRE(handler.version() == modules["ArffFiles"]);
}

125
tests/TestUtils.cc Normal file
View File

@@ -0,0 +1,125 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <random>
#include "TestUtils.h"
#include "bayesnet/config.h"
class Paths {
public:
static std::string datasets()
{
return { data_path.begin(), data_path.end() };
}
};
class ShuffleArffFiles : public ArffFiles {
public:
ShuffleArffFiles(int num_samples = 0, bool shuffle = false) : ArffFiles(), num_samples(num_samples), shuffle(shuffle) {}
void load(const std::string& file_name, bool class_last = true)
{
ArffFiles::load(file_name, class_last);
if (num_samples > 0) {
if (num_samples > getY().size()) {
throw std::invalid_argument("num_lines must be less than the number of lines in the file");
}
auto indices = std::vector<int>(num_samples);
std::iota(indices.begin(), indices.end(), 0);
if (shuffle) {
std::mt19937 g{ 173 };
std::shuffle(indices.begin(), indices.end(), g);
}
auto XX = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(num_samples));
auto yy = std::vector<int>(num_samples);
for (int i = 0; i < num_samples; i++) {
yy[i] = getY()[indices[i]];
for (int j = 0; j < attributes.size(); j++) {
XX[j][i] = X[j][indices[i]];
}
}
X = XX;
y = yy;
}
}
private:
int num_samples;
bool shuffle;
};
RawDatasets::RawDatasets(const std::string& file_name, bool discretize_, int num_samples_, bool shuffle_, bool class_last, bool debug)
{
num_samples = num_samples_;
shuffle = shuffle_;
discretize = discretize_;
// Xt can be either discretized or not
// Xv is always discretized
loadDataset(file_name, class_last);
auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1);
dataset = torch::cat({ Xt, yresized }, 0);
nSamples = dataset.size(1);
weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
weightsv = std::vector<double>(nSamples, 1.0 / nSamples);
classNumStates = discretize ? states.at(className).size() : 0;
auto fold = folding::StratifiedKFold(5, yt, 271);
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
y_train = dataset.index({ -1, train_t });
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
y_test = dataset.index({ -1, test_t });
if (debug)
std::cout << to_string();
}
map<std::string, int> RawDatasets::discretizeDataset(std::vector<mdlp::samples_t>& X)
{
map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], yv);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xv.push_back(xd);
}
return maxes;
}
void RawDatasets::loadDataset(const std::string& name, bool class_last)
{
auto handler = ShuffleArffFiles(num_samples, shuffle);
handler.load(Paths::datasets() + static_cast<std::string>(name) + ".arff", class_last);
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
yv = handler.getY();
// Get className & Features
className = handler.getClassName();
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
// Discretize Dataset
auto maxValues = discretizeDataset(X);
maxValues[className] = *max_element(yv.begin(), yv.end()) + 1;
if (discretize) {
// discretize the tensor as well
Xt = torch::zeros({ static_cast<int>(Xv.size()), static_cast<int>(Xv[0].size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(maxValues[features[i]]);
iota(begin(states.at(features[i])), end(states.at(features[i])), 0);
Xt.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kInt32));
}
states[className] = std::vector<int>(maxValues[className]);
iota(begin(states.at(className)), end(states.at(className)), 0);
} else {
Xt = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(X[i]));
}
}
yt = torch::tensor(yv, torch::kInt32);
}

72
tests/TestUtils.h Normal file
View File

@@ -0,0 +1,72 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef TEST_UTILS_H
#define TEST_UTILS_H
#include <torch/torch.h>
#include <string>
#include <vector>
#include <map>
#include <tuple>
#include <ArffFiles/ArffFiles.hpp>
#include <fimdlp/CPPFImdlp.h>
#include <folding.hpp>
#include <bayesnet/network/Network.h>
class RawDatasets {
public:
RawDatasets(const std::string& file_name, bool discretize_, int num_samples_ = 0, bool shuffle_ = false, bool class_last = true, bool debug = false);
torch::Tensor Xt, yt, dataset, weights;
torch::Tensor X_train, y_train, X_test, y_test;
std::vector<vector<int>> Xv;
std::vector<int> yv;
std::vector<double> weightsv;
std::vector<string> features;
std::string className;
map<std::string, std::vector<int>> states;
int nSamples, classNumStates;
double epsilon = 1e-5;
bool discretize;
int num_samples = 0;
bool shuffle = false;
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
private:
std::string to_string()
{
std::string features_ = "";
for (auto& f : features) {
features_ += f + " ";
}
std::string states_ = "";
for (auto& s : states) {
states_ += s.first + " ";
for (auto& v : s.second) {
states_ += std::to_string(v) + " ";
}
states_ += "\n";
}
return "Xt dimensions: " + std::to_string(Xt.size(0)) + " " + std::to_string(Xt.size(1)) + "\n"
"Xv dimensions: " + std::to_string(Xv.size()) + " " + std::to_string(Xv[0].size()) + "\n"
+ "yt dimensions: " + std::to_string(yt.size(0)) + "\n"
+ "yv dimensions: " + std::to_string(yv.size()) + "\n"
+ "X_train dimensions: " + std::to_string(X_train.size(0)) + " " + std::to_string(X_train.size(1)) + "\n"
+ "X_test dimensions: " + std::to_string(X_test.size(0)) + " " + std::to_string(X_test.size(1)) + "\n"
+ "y_train dimensions: " + std::to_string(y_train.size(0)) + "\n"
+ "y_test dimensions: " + std::to_string(y_test.size(0)) + "\n"
+ "features: " + std::to_string(features.size()) + "\n"
+ features_ + "\n"
+ "className: " + className + "\n"
+ "states: " + std::to_string(states.size()) + "\n"
+ "nSamples: " + std::to_string(nSamples) + "\n"
+ "classNumStates: " + std::to_string(classNumStates) + "\n"
+ "states: " + states_ + "\n";
}
map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X);
void loadDataset(const std::string& name, bool class_last);
};
#endif //TEST_UTILS_H

237
tests/TestXBA2DE.cc Normal file
View File

@@ -0,0 +1,237 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_approx.hpp>
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "TestUtils.h"
#include "bayesnet/ensembles/XBA2DE.h"
TEST_CASE("Normal test", "[XBA2DE]") {
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XBA2DE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getVersion() == "0.9.7");
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 13 models eliminated");
REQUIRE(clf.getNotes()[1] == "Number of models: 1");
REQUIRE(clf.getNumberOfStates() == 64);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(1.0f));
REQUIRE(clf.graph().size() == 1);
}
TEST_CASE("Feature_select CFS", "[XBA2DE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::XBA2DE();
clf.setHyperparameters({{"select_features", "CFS"}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 220);
REQUIRE(clf.getNumberOfEdges() == 506);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 22");
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
}
TEST_CASE("Feature_select IWSS", "[XBA2DE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::XBA2DE();
clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 220);
REQUIRE(clf.getNumberOfEdges() == 506);
REQUIRE(clf.getNotes().size() == 4);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
REQUIRE(clf.getNotes()[3] == "Number of models: 22");
REQUIRE(clf.getNumberOfStates() == 5346);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.72093));
}
TEST_CASE("Feature_select FCBF", "[XBA2DE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::XBA2DE();
clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 290);
REQUIRE(clf.getNumberOfEdges() == 667);
REQUIRE(clf.getNumberOfStates() == 7047);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 2");
REQUIRE(clf.getNotes()[2] == "Number of models: 29");
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.744186));
}
TEST_CASE("Test used features in train note and score", "[XBA2DE]") {
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::XBA2DE();
clf.setHyperparameters({
{"order", "asc"},
{"convergence", true},
{"select_features", "CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 144);
REQUIRE(clf.getNumberOfEdges() == 320);
REQUIRE(clf.getNumberOfStates() == 5504);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 16");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.850260437f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.850260437f).epsilon(raw.epsilon));
}
TEST_CASE("Order asc, desc & random", "[XBA2DE]") {
auto raw = RawDatasets("glass", true);
std::map<std::string, double> scores{{"asc", 0.827103}, {"desc", 0.808411}, {"rand", 0.827103}};
for (const std::string &order : {"asc", "desc", "rand"}) {
auto clf = bayesnet::XBA2DE();
clf.setHyperparameters({
{"order", order},
{"bisection", false},
{"maxTolerance", 1},
{"convergence", true},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("XBA2DE order: " << order);
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
}
}
TEST_CASE("Oddities", "[XBA2DE]") {
auto clf = bayesnet::XBA2DE();
auto raw = RawDatasets("iris", true);
auto bad_hyper = nlohmann::json{
{{"order", "duck"}},
{{"select_features", "duck"}},
{{"maxTolerance", 0}},
{{"maxTolerance", 7}},
};
for (const auto &hyper : bad_hyper.items()) {
INFO("XBA2DE hyper: " << hyper.value().dump());
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
}
REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument);
auto bad_hyper_fit = nlohmann::json{
{{"select_features", "IWSS"}, {"threshold", -0.01}},
{{"select_features", "IWSS"}, {"threshold", 0.51}},
{{"select_features", "FCBF"}, {"threshold", 1e-8}},
{{"select_features", "FCBF"}, {"threshold", 1.01}},
};
for (const auto &hyper : bad_hyper_fit.items()) {
INFO("XBA2DE hyper: " << hyper.value().dump());
clf.setHyperparameters(hyper.value());
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
std::invalid_argument);
}
auto bad_hyper_fit2 = nlohmann::json{
{{"alpha_block", true}, {"block_update", true}},
{{"bisection", false}, {"block_update", true}},
};
for (const auto &hyper : bad_hyper_fit2.items()) {
INFO("XBA2DE hyper: " << hyper.value().dump());
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
}
// Check not enough selected features
raw.Xv.pop_back();
raw.Xv.pop_back();
raw.Xv.pop_back();
raw.features.pop_back();
raw.features.pop_back();
raw.features.pop_back();
clf.setHyperparameters({{"select_features", "CFS"}, {"alpha_block", false}, {"block_update", false}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNotes().size() == 1);
REQUIRE(clf.getNotes()[0] == "No features selected in initialization");
}
TEST_CASE("Bisection Best", "[XBA2DE]") {
auto clf = bayesnet::XBA2DE();
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
clf.setHyperparameters({
{"bisection", true},
{"maxTolerance", 3},
{"convergence", true},
{"convergence_best", false},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 330);
REQUIRE(clf.getNumberOfEdges() == 836);
REQUIRE(clf.getNumberOfStates() == 31108);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes().at(0) == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes().at(1) == "Pairs not used in train: 83");
REQUIRE(clf.getNotes().at(2) == "Number of models: 22");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(0.975).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.975).epsilon(raw.epsilon));
}
TEST_CASE("Bisection Best vs Last", "[XBA2DE]") {
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
auto clf = bayesnet::XBA2DE();
auto hyperparameters = nlohmann::json{
{"bisection", true},
{"maxTolerance", 3},
{"convergence", true},
{"convergence_best", true},
};
clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_best = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_best == Catch::Approx(0.983333).epsilon(raw.epsilon));
// Now we will set the hyperparameter to use the last accuracy
hyperparameters["convergence_best"] = false;
clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_last = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_last == Catch::Approx(0.99).epsilon(raw.epsilon));
}
TEST_CASE("Block Update", "[XBA2DE]") {
auto clf = bayesnet::XBA2DE();
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
clf.setHyperparameters({
{"bisection", true},
{"block_update", true},
{"maxTolerance", 3},
{"convergence", true},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 120);
REQUIRE(clf.getNumberOfEdges() == 304);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 83");
REQUIRE(clf.getNotes()[2] == "Number of models: 8");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(0.963333).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.963333).epsilon(raw.epsilon));
/*std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;*/
/*std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;*/
/*std::cout << "Notes size " << clf.getNotes().size() << std::endl;*/
/*for (auto note : clf.getNotes()) {*/
/* std::cout << note << std::endl;*/
/*}*/
/*std::cout << "Score " << score << std::endl;*/
}
TEST_CASE("Alphablock", "[XBA2DE]") {
auto clf_alpha = bayesnet::XBA2DE();
auto clf_no_alpha = bayesnet::XBA2DE();
auto raw = RawDatasets("diabetes", true);
clf_alpha.setHyperparameters({
{"alpha_block", true},
});
clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test);
auto score_no_alpha = clf_no_alpha.score(raw.X_test, raw.y_test);
REQUIRE(score_alpha == Catch::Approx(0.714286).epsilon(raw.epsilon));
REQUIRE(score_no_alpha == Catch::Approx(0.714286).epsilon(raw.epsilon));
}

216
tests/TestXBAODE.cc Normal file
View File

@@ -0,0 +1,216 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_approx.hpp>
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "TestUtils.h"
#include "bayesnet/ensembles/XBAODE.h"
TEST_CASE("Normal test", "[XBAODE]") {
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XBAODE();
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 20);
REQUIRE(clf.getNumberOfEdges() == 36);
REQUIRE(clf.getNotes().size() == 1);
REQUIRE(clf.getVersion() == "0.9.7");
REQUIRE(clf.getNotes()[0] == "Number of models: 4");
REQUIRE(clf.getNumberOfStates() == 256);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.933333));
}
TEST_CASE("Feature_select CFS", "[XBAODE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::XBAODE();
clf.setHyperparameters({{"select_features", "CFS"}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 171);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
}
TEST_CASE("Feature_select IWSS", "[XBAODE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::XBAODE();
clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 171);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.697674394));
}
TEST_CASE("Feature_select FCBF", "[XBAODE]") {
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::XBAODE();
clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 171);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
}
TEST_CASE("Test used features in train note and score", "[XBAODE]") {
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::XBAODE();
clf.setHyperparameters({
{"order", "asc"},
{"convergence", true},
{"select_features", "CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 136);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.819010437f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.819010437f).epsilon(raw.epsilon));
}
TEST_CASE("Order asc, desc & random", "[XBAODE]") {
auto raw = RawDatasets("glass", true);
std::map<std::string, double> scores{{"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112}};
for (const std::string &order : {"asc", "desc", "rand"}) {
auto clf = bayesnet::XBAODE();
clf.setHyperparameters({
{"order", order},
{"bisection", false},
{"maxTolerance", 1},
{"convergence", false},
});
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("XBAODE order: " << order);
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
}
}
TEST_CASE("Oddities", "[XBAODE]") {
auto clf = bayesnet::XBAODE();
auto raw = RawDatasets("iris", true);
auto bad_hyper = nlohmann::json{
{{"order", "duck"}},
{{"select_features", "duck"}},
{{"maxTolerance", 0}},
{{"maxTolerance", 7}},
};
for (const auto &hyper : bad_hyper.items()) {
INFO("XBAODE hyper: " << hyper.value().dump());
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
}
REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument);
auto bad_hyper_fit = nlohmann::json{
{{"select_features", "IWSS"}, {"threshold", -0.01}},
{{"select_features", "IWSS"}, {"threshold", 0.51}},
{{"select_features", "FCBF"}, {"threshold", 1e-8}},
{{"select_features", "FCBF"}, {"threshold", 1.01}},
};
for (const auto &hyper : bad_hyper_fit.items()) {
INFO("XBAODE hyper: " << hyper.value().dump());
clf.setHyperparameters(hyper.value());
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
std::invalid_argument);
}
auto bad_hyper_fit2 = nlohmann::json{
{{"alpha_block", true}, {"block_update", true}},
{{"bisection", false}, {"block_update", true}},
};
for (const auto &hyper : bad_hyper_fit2.items()) {
INFO("XBAODE hyper: " << hyper.value().dump());
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
}
}
TEST_CASE("Bisection Best", "[XBAODE]") {
auto clf = bayesnet::XBAODE();
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
clf.setHyperparameters({
{"bisection", true},
{"maxTolerance", 3},
{"convergence", true},
{"convergence_best", false},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 210);
REQUIRE(clf.getNumberOfEdges() == 406);
REQUIRE(clf.getNotes().size() == 1);
REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
}
TEST_CASE("Bisection Best vs Last", "[XBAODE]") {
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
auto clf = bayesnet::XBAODE();
auto hyperparameters = nlohmann::json{
{"bisection", true},
{"maxTolerance", 3},
{"convergence", true},
{"convergence_best", true},
};
clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_best = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_best == Catch::Approx(0.973333359f).epsilon(raw.epsilon));
// Now we will set the hyperparameter to use the last accuracy
hyperparameters["convergence_best"] = false;
clf.setHyperparameters(hyperparameters);
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_last = clf.score(raw.X_test, raw.y_test);
REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
}
TEST_CASE("Block Update", "[XBAODE]") {
auto clf = bayesnet::XBAODE();
auto raw = RawDatasets("mfeat-factors", true, 500);
clf.setHyperparameters({
{"bisection", true},
{"block_update", true},
{"maxTolerance", 3},
{"convergence", true},
});
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 1085);
REQUIRE(clf.getNumberOfEdges() == 2165);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
REQUIRE(clf.getNotes()[1] == "Used features in train: 20 of 216");
REQUIRE(clf.getNotes()[2] == "Number of models: 5");
auto score = clf.score(raw.X_test, raw.y_test);
auto scoret = clf.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
//
// std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
// std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
// std::cout << "Notes size " << clf.getNotes().size() << std::endl;
// for (auto note : clf.getNotes()) {
// std::cout << note << std::endl;
// }
// std::cout << "Score " << score << std::endl;
}
TEST_CASE("Alphablock", "[XBAODE]") {
auto clf_alpha = bayesnet::XBAODE();
auto clf_no_alpha = bayesnet::XBAODE();
auto raw = RawDatasets("diabetes", true);
clf_alpha.setHyperparameters({
{"alpha_block", true},
});
clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test);
auto score_no_alpha = clf_no_alpha.score(raw.X_test, raw.y_test);
REQUIRE(score_alpha == Catch::Approx(0.720779f).epsilon(raw.epsilon));
REQUIRE(score_no_alpha == Catch::Approx(0.733766f).epsilon(raw.epsilon));
}

126
tests/TestXSPODE.cc Normal file
View File

@@ -0,0 +1,126 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include <stdexcept>
#include "bayesnet/classifiers/XSPODE.h"
#include "TestUtils.h"
TEST_CASE("fit vector test", "[XSPODE]") {
auto raw = RawDatasets("iris", true);
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
for (int i = 0; i < 4; ++i) {
auto clf = bayesnet::XSpode(i);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 9);
REQUIRE(clf.getNotes().size() == 0);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
}
}
TEST_CASE("fit dataset test", "[XSPODE]") {
auto raw = RawDatasets("iris", true);
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
for (int i = 0; i < 4; ++i) {
auto clf = bayesnet::XSpode(i);
clf.fit(raw.dataset, raw.features, raw.className, raw.states,
raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 9);
REQUIRE(clf.getNotes().size() == 0);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
}
}
TEST_CASE("tensors dataset predict & predict_proba", "[XSPODE]") {
auto raw = RawDatasets("iris", true);
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
auto probs_expected = std::vector<std::vector<float>>({
{0.999017, 0.000306908, 0.000676449},
{0.99831, 0.00119304, 0.000497099},
{0.998432, 0.00078416, 0.00078416},
{0.998801, 0.000599438, 0.000599438}
});
for (int i = 0; i < 4; ++i) {
auto clf = bayesnet::XSpode(i);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states,
raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 9);
REQUIRE(clf.getNotes().size() == 0);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
// Get the first 4 lines of X_test to do predict_proba
auto X_reduced = raw.X_test.slice(1, 0, 4);
auto proba = clf.predict_proba(X_reduced);
for (int p = 0; p < 3; ++p) {
REQUIRE(proba[0][p].item<double>() == Catch::Approx(probs_expected.at(i).at(p)));
}
}
}
TEST_CASE("mfeat-factors dataset test", "[XSPODE]") {
auto raw = RawDatasets("mfeat-factors", true);
auto scores = std::vector<float>({0.9825, 0.9775, 0.9775, 0.99});
for (int i = 0; i < 4; ++i) {
auto clf = bayesnet::XSpode(i);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 217);
REQUIRE(clf.getNumberOfEdges() == 433);
REQUIRE(clf.getNotes().size() == 0);
REQUIRE(clf.getNumberOfStates() == 652320);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
}
}
TEST_CASE("Laplace predict", "[XSPODE]") {
auto raw = RawDatasets("iris", true);
auto scores = std::vector<float>({0.966666639, 1.0f, 0.933333337, 1.0f});
for (int i = 0; i < 4; ++i) {
auto clf = bayesnet::XSpode(0);
clf.setHyperparameters({ {"parent", i} });
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::LAPLACE);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 9);
REQUIRE(clf.getNotes().size() == 0);
REQUIRE(clf.getNumberOfStates() == 64);
REQUIRE(clf.getNFeatures() == 4);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
}
}
TEST_CASE("Not fitted model predict", "[XSPODE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XSpode(0);
REQUIRE_THROWS_AS(clf.predict(std::vector<int>({1,2,3})), std::logic_error);
}
TEST_CASE("Test instance predict", "[XSPODE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XSpode(0);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::ORIGINAL);
REQUIRE(clf.predict(std::vector<int>({1,2,3,4})) == 1);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f));
// Cestnik is not defined in the classifier so it should imply alpha_ = 0
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::CESTNIK);
REQUIRE(clf.predict(std::vector<int>({1,2,3,4})) == 0);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f));
}
TEST_CASE("Test to_string and fitx", "[XSPODE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XSpode(0);
auto weights = torch::full({raw.Xt.size(1)}, 1.0 / raw.Xt.size(1), torch::kFloat64);
clf.fitx(raw.Xt, raw.yt, weights, bayesnet::Smoothing_t::ORIGINAL);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 9);
REQUIRE(clf.getNotes().size() == 0);
REQUIRE(clf.getNumberOfStates() == 64);
REQUIRE(clf.getNFeatures() == 4);
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.966666639f));
REQUIRE(clf.to_string().size() == 1966);
REQUIRE(clf.graph("Not yet implemented") == std::vector<std::string>({"Not yet implemented"}));
}

141
tests/TestXSPnDE.cc Normal file
View File

@@ -0,0 +1,141 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "bayesnet/classifiers/XSP2DE.h" // <-- your new 2-superparent classifier
#include "TestUtils.h" // for RawDatasets, etc.
// Helper function to handle each (sp1, sp2) pair in tests
static void check_spnde_pair(
int sp1,
int sp2,
RawDatasets &raw,
bool fitVector,
bool fitTensor)
{
// Create our classifier
bayesnet::XSp2de clf(sp1, sp2);
// Option A: fit with vector-based data
if (fitVector) {
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
}
// Option B: fit with the whole dataset in torch::Tensor form
else if (fitTensor) {
// your “tensor” version of fit
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
}
// Option C: or you might do the “dataset” version:
else {
clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
}
// Basic checks
REQUIRE(clf.getNumberOfNodes() == 5); // for iris: 4 features + 1 class
REQUIRE(clf.getNumberOfEdges() == 8);
REQUIRE(clf.getNotes().size() == 0);
// Evaluate on test set
float sc = clf.score(raw.X_test, raw.y_test);
REQUIRE(sc >= 0.93f);
}
// ------------------------------------------------------------
// 1) Fit vector test
// ------------------------------------------------------------
TEST_CASE("fit vector test (XSP2DE)", "[XSP2DE]") {
auto raw = RawDatasets("iris", true);
std::vector<std::pair<int,int>> parentPairs = {
{0,1}, {2,3}
};
for (auto &p : parentPairs) {
check_spnde_pair(p.first, p.second, raw, /*fitVector=*/true, /*fitTensor=*/false);
}
}
// ------------------------------------------------------------
// 2) Fit dataset test
// ------------------------------------------------------------
TEST_CASE("fit dataset test (XSP2DE)", "[XSP2DE]") {
auto raw = RawDatasets("iris", true);
// Again test multiple pairs:
std::vector<std::pair<int,int>> parentPairs = {
{0,2}, {1,3}
};
for (auto &p : parentPairs) {
check_spnde_pair(p.first, p.second, raw, /*fitVector=*/false, /*fitTensor=*/false);
}
}
// ------------------------------------------------------------
// 3) Tensors dataset predict & predict_proba
// ------------------------------------------------------------
TEST_CASE("tensors dataset predict & predict_proba (XSP2DE)", "[XSP2DE]") {
auto raw = RawDatasets("iris", true);
std::vector<std::pair<int,int>> parentPairs = {
{0,3}, {1,2}
};
for (auto &p : parentPairs) {
bayesnet::XSp2de clf(p.first, p.second);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
REQUIRE(clf.getNotes().size() == 0);
// Check the score
float sc = clf.score(raw.X_test, raw.y_test);
REQUIRE(sc >= 0.90f);
auto X_reduced = raw.X_test.slice(1, 0, 3);
auto proba = clf.predict_proba(X_reduced);
}
}
TEST_CASE("Check hyperparameters", "[XSP2DE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XSp2de(0, 1);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
auto clf2 = bayesnet::XSp2de(2, 3);
clf2.setHyperparameters({{"parent1", 0}, {"parent2", 1}});
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.to_string() == clf2.to_string());
}
TEST_CASE("Check different smoothing", "[XSP2DE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XSp2de(0, 1);
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::ORIGINAL);
auto clf2 = bayesnet::XSp2de(0, 1);
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::LAPLACE);
auto clf3 = bayesnet::XSp2de(0, 1);
clf3.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::NONE);
auto score = clf.score(raw.X_test, raw.y_test);
auto score2 = clf2.score(raw.X_test, raw.y_test);
auto score3 = clf3.score(raw.X_test, raw.y_test);
REQUIRE(score == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(score2 == Catch::Approx(0.7333333).epsilon(raw.epsilon));
REQUIRE(score3 == Catch::Approx(0.966667).epsilon(raw.epsilon));
}
TEST_CASE("Check rest", "[XSP2DE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::XSp2de(0, 1);
REQUIRE_THROWS_AS(clf.predict_proba(std::vector<int>({1,2,3,4})), std::logic_error);
clf.fitx(raw.Xt, raw.yt, raw.weights, bayesnet::Smoothing_t::ORIGINAL);
REQUIRE(clf.getNFeatures() == 4);
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f).epsilon(raw.epsilon));
REQUIRE(clf.predict({1,2,3,4}) == 1);
}

41
tests/Timer.h Normal file
View File

@@ -0,0 +1,41 @@
#pragma once
#include <chrono>
#include <string>
#include <sstream>
namespace platform {
class Timer {
private:
std::chrono::high_resolution_clock::time_point begin;
std::chrono::high_resolution_clock::time_point end;
public:
Timer() = default;
~Timer() = default;
void start() { begin = std::chrono::high_resolution_clock::now(); }
void stop() { end = std::chrono::high_resolution_clock::now(); }
double getDuration()
{
stop();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
return time_span.count();
}
double getLapse()
{
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (std::chrono::high_resolution_clock::now() - begin);
return time_span.count();
}
std::string getDurationString(bool lapse = false)
{
double duration = lapse ? getLapse() : getDuration();
return translate2String(duration);
}
std::string translate2String(double duration)
{
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
std::stringstream ss;
ss << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit;
return ss.str();
}
};
} /* namespace platform */

863
tests/data/diabetes.arff Executable file
View File

@@ -0,0 +1,863 @@
% 1. Title: Pima Indians Diabetes Database
%
% 2. Sources:
% (a) Original owners: National Institute of Diabetes and Digestive and
% Kidney Diseases
% (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)
% Research Center, RMI Group Leader
% Applied Physics Laboratory
% The Johns Hopkins University
% Johns Hopkins Road
% Laurel, MD 20707
% (301) 953-6231
% (c) Date received: 9 May 1990
%
% 3. Past Usage:
% 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \&
% Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast
% the onset of diabetes mellitus. In {\it Proceedings of the Symposium
% on Computer Applications and Medical Care} (pp. 261--265). IEEE
% Computer Society Press.
%
% The diagnostic, binary-valued variable investigated is whether the
% patient shows signs of diabetes according to World Health Organization
% criteria (i.e., if the 2 hour post-load plasma glucose was at least
% 200 mg/dl at any survey examination or if found during routine medical
% care). The population lives near Phoenix, Arizona, USA.
%
% Results: Their ADAP algorithm makes a real-valued prediction between
% 0 and 1. This was transformed into a binary decision using a cutoff of
% 0.448. Using 576 training instances, the sensitivity and specificity
% of their algorithm was 76% on the remaining 192 instances.
%
% 4. Relevant Information:
% Several constraints were placed on the selection of these instances from
% a larger database. In particular, all patients here are females at
% least 21 years old of Pima Indian heritage. ADAP is an adaptive learning
% routine that generates and executes digital analogs of perceptron-like
% devices. It is a unique algorithm; see the paper for details.
%
% 5. Number of Instances: 768
%
% 6. Number of Attributes: 8 plus class
%
% 7. For Each Attribute: (all numeric-valued)
% 1. Number of times pregnant
% 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
% 3. Diastolic blood pressure (mm Hg)
% 4. Triceps skin fold thickness (mm)
% 5. 2-Hour serum insulin (mu U/ml)
% 6. Body mass index (weight in kg/(height in m)^2)
% 7. Diabetes pedigree function
% 8. Age (years)
% 9. Class variable (0 or 1)
%
% 8. Missing Attribute Values: None
%
% 9. Class Distribution: (class value 1 is interpreted as "tested positive for
% diabetes")
%
% Class Value Number of instances
% 0 500
% 1 268
%
% 10. Brief statistical analysis:
%
% Attribute number: Mean: Standard Deviation:
% 1. 3.8 3.4
% 2. 120.9 32.0
% 3. 69.1 19.4
% 4. 20.5 16.0
% 5. 79.8 115.2
% 6. 32.0 7.9
% 7. 0.5 0.3
% 8. 33.2 11.8
%
%
%
%
%
%
% Relabeled values in attribute 'class'
% From: 0 To: tested_negative
% From: 1 To: tested_positive
%
@relation pima_diabetes
@attribute 'preg' real
@attribute 'plas' real
@attribute 'pres' real
@attribute 'skin' real
@attribute 'insu' real
@attribute 'mass' real
@attribute 'pedi' real
@attribute 'age' real
@attribute 'class' { tested_negative, tested_positive}
@data
6,148,72,35,0,33.6,0.627,50,tested_positive
1,85,66,29,0,26.6,0.351,31,tested_negative
8,183,64,0,0,23.3,0.672,32,tested_positive
1,89,66,23,94,28.1,0.167,21,tested_negative
0,137,40,35,168,43.1,2.288,33,tested_positive
5,116,74,0,0,25.6,0.201,30,tested_negative
3,78,50,32,88,31,0.248,26,tested_positive
10,115,0,0,0,35.3,0.134,29,tested_negative
2,197,70,45,543,30.5,0.158,53,tested_positive
8,125,96,0,0,0,0.232,54,tested_positive
4,110,92,0,0,37.6,0.191,30,tested_negative
10,168,74,0,0,38,0.537,34,tested_positive
10,139,80,0,0,27.1,1.441,57,tested_negative
1,189,60,23,846,30.1,0.398,59,tested_positive
5,166,72,19,175,25.8,0.587,51,tested_positive
7,100,0,0,0,30,0.484,32,tested_positive
0,118,84,47,230,45.8,0.551,31,tested_positive
7,107,74,0,0,29.6,0.254,31,tested_positive
1,103,30,38,83,43.3,0.183,33,tested_negative
1,115,70,30,96,34.6,0.529,32,tested_positive
3,126,88,41,235,39.3,0.704,27,tested_negative
8,99,84,0,0,35.4,0.388,50,tested_negative
7,196,90,0,0,39.8,0.451,41,tested_positive
9,119,80,35,0,29,0.263,29,tested_positive
11,143,94,33,146,36.6,0.254,51,tested_positive
10,125,70,26,115,31.1,0.205,41,tested_positive
7,147,76,0,0,39.4,0.257,43,tested_positive
1,97,66,15,140,23.2,0.487,22,tested_negative
13,145,82,19,110,22.2,0.245,57,tested_negative
5,117,92,0,0,34.1,0.337,38,tested_negative
5,109,75,26,0,36,0.546,60,tested_negative
3,158,76,36,245,31.6,0.851,28,tested_positive
3,88,58,11,54,24.8,0.267,22,tested_negative
6,92,92,0,0,19.9,0.188,28,tested_negative
10,122,78,31,0,27.6,0.512,45,tested_negative
4,103,60,33,192,24,0.966,33,tested_negative
11,138,76,0,0,33.2,0.42,35,tested_negative
9,102,76,37,0,32.9,0.665,46,tested_positive
2,90,68,42,0,38.2,0.503,27,tested_positive
4,111,72,47,207,37.1,1.39,56,tested_positive
3,180,64,25,70,34,0.271,26,tested_negative
7,133,84,0,0,40.2,0.696,37,tested_negative
7,106,92,18,0,22.7,0.235,48,tested_negative
9,171,110,24,240,45.4,0.721,54,tested_positive
7,159,64,0,0,27.4,0.294,40,tested_negative
0,180,66,39,0,42,1.893,25,tested_positive
1,146,56,0,0,29.7,0.564,29,tested_negative
2,71,70,27,0,28,0.586,22,tested_negative
7,103,66,32,0,39.1,0.344,31,tested_positive
7,105,0,0,0,0,0.305,24,tested_negative
1,103,80,11,82,19.4,0.491,22,tested_negative
1,101,50,15,36,24.2,0.526,26,tested_negative
5,88,66,21,23,24.4,0.342,30,tested_negative
8,176,90,34,300,33.7,0.467,58,tested_positive
7,150,66,42,342,34.7,0.718,42,tested_negative
1,73,50,10,0,23,0.248,21,tested_negative
7,187,68,39,304,37.7,0.254,41,tested_positive
0,100,88,60,110,46.8,0.962,31,tested_negative
0,146,82,0,0,40.5,1.781,44,tested_negative
0,105,64,41,142,41.5,0.173,22,tested_negative
2,84,0,0,0,0,0.304,21,tested_negative
8,133,72,0,0,32.9,0.27,39,tested_positive
5,44,62,0,0,25,0.587,36,tested_negative
2,141,58,34,128,25.4,0.699,24,tested_negative
7,114,66,0,0,32.8,0.258,42,tested_positive
5,99,74,27,0,29,0.203,32,tested_negative
0,109,88,30,0,32.5,0.855,38,tested_positive
2,109,92,0,0,42.7,0.845,54,tested_negative
1,95,66,13,38,19.6,0.334,25,tested_negative
4,146,85,27,100,28.9,0.189,27,tested_negative
2,100,66,20,90,32.9,0.867,28,tested_positive
5,139,64,35,140,28.6,0.411,26,tested_negative
13,126,90,0,0,43.4,0.583,42,tested_positive
4,129,86,20,270,35.1,0.231,23,tested_negative
1,79,75,30,0,32,0.396,22,tested_negative
1,0,48,20,0,24.7,0.14,22,tested_negative
7,62,78,0,0,32.6,0.391,41,tested_negative
5,95,72,33,0,37.7,0.37,27,tested_negative
0,131,0,0,0,43.2,0.27,26,tested_positive
2,112,66,22,0,25,0.307,24,tested_negative
3,113,44,13,0,22.4,0.14,22,tested_negative
2,74,0,0,0,0,0.102,22,tested_negative
7,83,78,26,71,29.3,0.767,36,tested_negative
0,101,65,28,0,24.6,0.237,22,tested_negative
5,137,108,0,0,48.8,0.227,37,tested_positive
2,110,74,29,125,32.4,0.698,27,tested_negative
13,106,72,54,0,36.6,0.178,45,tested_negative
2,100,68,25,71,38.5,0.324,26,tested_negative
15,136,70,32,110,37.1,0.153,43,tested_positive
1,107,68,19,0,26.5,0.165,24,tested_negative
1,80,55,0,0,19.1,0.258,21,tested_negative
4,123,80,15,176,32,0.443,34,tested_negative
7,81,78,40,48,46.7,0.261,42,tested_negative
4,134,72,0,0,23.8,0.277,60,tested_positive
2,142,82,18,64,24.7,0.761,21,tested_negative
6,144,72,27,228,33.9,0.255,40,tested_negative
2,92,62,28,0,31.6,0.13,24,tested_negative
1,71,48,18,76,20.4,0.323,22,tested_negative
6,93,50,30,64,28.7,0.356,23,tested_negative
1,122,90,51,220,49.7,0.325,31,tested_positive
1,163,72,0,0,39,1.222,33,tested_positive
1,151,60,0,0,26.1,0.179,22,tested_negative
0,125,96,0,0,22.5,0.262,21,tested_negative
1,81,72,18,40,26.6,0.283,24,tested_negative
2,85,65,0,0,39.6,0.93,27,tested_negative
1,126,56,29,152,28.7,0.801,21,tested_negative
1,96,122,0,0,22.4,0.207,27,tested_negative
4,144,58,28,140,29.5,0.287,37,tested_negative
3,83,58,31,18,34.3,0.336,25,tested_negative
0,95,85,25,36,37.4,0.247,24,tested_positive
3,171,72,33,135,33.3,0.199,24,tested_positive
8,155,62,26,495,34,0.543,46,tested_positive
1,89,76,34,37,31.2,0.192,23,tested_negative
4,76,62,0,0,34,0.391,25,tested_negative
7,160,54,32,175,30.5,0.588,39,tested_positive
4,146,92,0,0,31.2,0.539,61,tested_positive
5,124,74,0,0,34,0.22,38,tested_positive
5,78,48,0,0,33.7,0.654,25,tested_negative
4,97,60,23,0,28.2,0.443,22,tested_negative
4,99,76,15,51,23.2,0.223,21,tested_negative
0,162,76,56,100,53.2,0.759,25,tested_positive
6,111,64,39,0,34.2,0.26,24,tested_negative
2,107,74,30,100,33.6,0.404,23,tested_negative
5,132,80,0,0,26.8,0.186,69,tested_negative
0,113,76,0,0,33.3,0.278,23,tested_positive
1,88,30,42,99,55,0.496,26,tested_positive
3,120,70,30,135,42.9,0.452,30,tested_negative
1,118,58,36,94,33.3,0.261,23,tested_negative
1,117,88,24,145,34.5,0.403,40,tested_positive
0,105,84,0,0,27.9,0.741,62,tested_positive
4,173,70,14,168,29.7,0.361,33,tested_positive
9,122,56,0,0,33.3,1.114,33,tested_positive
3,170,64,37,225,34.5,0.356,30,tested_positive
8,84,74,31,0,38.3,0.457,39,tested_negative
2,96,68,13,49,21.1,0.647,26,tested_negative
2,125,60,20,140,33.8,0.088,31,tested_negative
0,100,70,26,50,30.8,0.597,21,tested_negative
0,93,60,25,92,28.7,0.532,22,tested_negative
0,129,80,0,0,31.2,0.703,29,tested_negative
5,105,72,29,325,36.9,0.159,28,tested_negative
3,128,78,0,0,21.1,0.268,55,tested_negative
5,106,82,30,0,39.5,0.286,38,tested_negative
2,108,52,26,63,32.5,0.318,22,tested_negative
10,108,66,0,0,32.4,0.272,42,tested_positive
4,154,62,31,284,32.8,0.237,23,tested_negative
0,102,75,23,0,0,0.572,21,tested_negative
9,57,80,37,0,32.8,0.096,41,tested_negative
2,106,64,35,119,30.5,1.4,34,tested_negative
5,147,78,0,0,33.7,0.218,65,tested_negative
2,90,70,17,0,27.3,0.085,22,tested_negative
1,136,74,50,204,37.4,0.399,24,tested_negative
4,114,65,0,0,21.9,0.432,37,tested_negative
9,156,86,28,155,34.3,1.189,42,tested_positive
1,153,82,42,485,40.6,0.687,23,tested_negative
8,188,78,0,0,47.9,0.137,43,tested_positive
7,152,88,44,0,50,0.337,36,tested_positive
2,99,52,15,94,24.6,0.637,21,tested_negative
1,109,56,21,135,25.2,0.833,23,tested_negative
2,88,74,19,53,29,0.229,22,tested_negative
17,163,72,41,114,40.9,0.817,47,tested_positive
4,151,90,38,0,29.7,0.294,36,tested_negative
7,102,74,40,105,37.2,0.204,45,tested_negative
0,114,80,34,285,44.2,0.167,27,tested_negative
2,100,64,23,0,29.7,0.368,21,tested_negative
0,131,88,0,0,31.6,0.743,32,tested_positive
6,104,74,18,156,29.9,0.722,41,tested_positive
3,148,66,25,0,32.5,0.256,22,tested_negative
4,120,68,0,0,29.6,0.709,34,tested_negative
4,110,66,0,0,31.9,0.471,29,tested_negative
3,111,90,12,78,28.4,0.495,29,tested_negative
6,102,82,0,0,30.8,0.18,36,tested_positive
6,134,70,23,130,35.4,0.542,29,tested_positive
2,87,0,23,0,28.9,0.773,25,tested_negative
1,79,60,42,48,43.5,0.678,23,tested_negative
2,75,64,24,55,29.7,0.37,33,tested_negative
8,179,72,42,130,32.7,0.719,36,tested_positive
6,85,78,0,0,31.2,0.382,42,tested_negative
0,129,110,46,130,67.1,0.319,26,tested_positive
5,143,78,0,0,45,0.19,47,tested_negative
5,130,82,0,0,39.1,0.956,37,tested_positive
6,87,80,0,0,23.2,0.084,32,tested_negative
0,119,64,18,92,34.9,0.725,23,tested_negative
1,0,74,20,23,27.7,0.299,21,tested_negative
5,73,60,0,0,26.8,0.268,27,tested_negative
4,141,74,0,0,27.6,0.244,40,tested_negative
7,194,68,28,0,35.9,0.745,41,tested_positive
8,181,68,36,495,30.1,0.615,60,tested_positive
1,128,98,41,58,32,1.321,33,tested_positive
8,109,76,39,114,27.9,0.64,31,tested_positive
5,139,80,35,160,31.6,0.361,25,tested_positive
3,111,62,0,0,22.6,0.142,21,tested_negative
9,123,70,44,94,33.1,0.374,40,tested_negative
7,159,66,0,0,30.4,0.383,36,tested_positive
11,135,0,0,0,52.3,0.578,40,tested_positive
8,85,55,20,0,24.4,0.136,42,tested_negative
5,158,84,41,210,39.4,0.395,29,tested_positive
1,105,58,0,0,24.3,0.187,21,tested_negative
3,107,62,13,48,22.9,0.678,23,tested_positive
4,109,64,44,99,34.8,0.905,26,tested_positive
4,148,60,27,318,30.9,0.15,29,tested_positive
0,113,80,16,0,31,0.874,21,tested_negative
1,138,82,0,0,40.1,0.236,28,tested_negative
0,108,68,20,0,27.3,0.787,32,tested_negative
2,99,70,16,44,20.4,0.235,27,tested_negative
6,103,72,32,190,37.7,0.324,55,tested_negative
5,111,72,28,0,23.9,0.407,27,tested_negative
8,196,76,29,280,37.5,0.605,57,tested_positive
5,162,104,0,0,37.7,0.151,52,tested_positive
1,96,64,27,87,33.2,0.289,21,tested_negative
7,184,84,33,0,35.5,0.355,41,tested_positive
2,81,60,22,0,27.7,0.29,25,tested_negative
0,147,85,54,0,42.8,0.375,24,tested_negative
7,179,95,31,0,34.2,0.164,60,tested_negative
0,140,65,26,130,42.6,0.431,24,tested_positive
9,112,82,32,175,34.2,0.26,36,tested_positive
12,151,70,40,271,41.8,0.742,38,tested_positive
5,109,62,41,129,35.8,0.514,25,tested_positive
6,125,68,30,120,30,0.464,32,tested_negative
5,85,74,22,0,29,1.224,32,tested_positive
5,112,66,0,0,37.8,0.261,41,tested_positive
0,177,60,29,478,34.6,1.072,21,tested_positive
2,158,90,0,0,31.6,0.805,66,tested_positive
7,119,0,0,0,25.2,0.209,37,tested_negative
7,142,60,33,190,28.8,0.687,61,tested_negative
1,100,66,15,56,23.6,0.666,26,tested_negative
1,87,78,27,32,34.6,0.101,22,tested_negative
0,101,76,0,0,35.7,0.198,26,tested_negative
3,162,52,38,0,37.2,0.652,24,tested_positive
4,197,70,39,744,36.7,2.329,31,tested_negative
0,117,80,31,53,45.2,0.089,24,tested_negative
4,142,86,0,0,44,0.645,22,tested_positive
6,134,80,37,370,46.2,0.238,46,tested_positive
1,79,80,25,37,25.4,0.583,22,tested_negative
4,122,68,0,0,35,0.394,29,tested_negative
3,74,68,28,45,29.7,0.293,23,tested_negative
4,171,72,0,0,43.6,0.479,26,tested_positive
7,181,84,21,192,35.9,0.586,51,tested_positive
0,179,90,27,0,44.1,0.686,23,tested_positive
9,164,84,21,0,30.8,0.831,32,tested_positive
0,104,76,0,0,18.4,0.582,27,tested_negative
1,91,64,24,0,29.2,0.192,21,tested_negative
4,91,70,32,88,33.1,0.446,22,tested_negative
3,139,54,0,0,25.6,0.402,22,tested_positive
6,119,50,22,176,27.1,1.318,33,tested_positive
2,146,76,35,194,38.2,0.329,29,tested_negative
9,184,85,15,0,30,1.213,49,tested_positive
10,122,68,0,0,31.2,0.258,41,tested_negative
0,165,90,33,680,52.3,0.427,23,tested_negative
9,124,70,33,402,35.4,0.282,34,tested_negative
1,111,86,19,0,30.1,0.143,23,tested_negative
9,106,52,0,0,31.2,0.38,42,tested_negative
2,129,84,0,0,28,0.284,27,tested_negative
2,90,80,14,55,24.4,0.249,24,tested_negative
0,86,68,32,0,35.8,0.238,25,tested_negative
12,92,62,7,258,27.6,0.926,44,tested_positive
1,113,64,35,0,33.6,0.543,21,tested_positive
3,111,56,39,0,30.1,0.557,30,tested_negative
2,114,68,22,0,28.7,0.092,25,tested_negative
1,193,50,16,375,25.9,0.655,24,tested_negative
11,155,76,28,150,33.3,1.353,51,tested_positive
3,191,68,15,130,30.9,0.299,34,tested_negative
3,141,0,0,0,30,0.761,27,tested_positive
4,95,70,32,0,32.1,0.612,24,tested_negative
3,142,80,15,0,32.4,0.2,63,tested_negative
4,123,62,0,0,32,0.226,35,tested_positive
5,96,74,18,67,33.6,0.997,43,tested_negative
0,138,0,0,0,36.3,0.933,25,tested_positive
2,128,64,42,0,40,1.101,24,tested_negative
0,102,52,0,0,25.1,0.078,21,tested_negative
2,146,0,0,0,27.5,0.24,28,tested_positive
10,101,86,37,0,45.6,1.136,38,tested_positive
2,108,62,32,56,25.2,0.128,21,tested_negative
3,122,78,0,0,23,0.254,40,tested_negative
1,71,78,50,45,33.2,0.422,21,tested_negative
13,106,70,0,0,34.2,0.251,52,tested_negative
2,100,70,52,57,40.5,0.677,25,tested_negative
7,106,60,24,0,26.5,0.296,29,tested_positive
0,104,64,23,116,27.8,0.454,23,tested_negative
5,114,74,0,0,24.9,0.744,57,tested_negative
2,108,62,10,278,25.3,0.881,22,tested_negative
0,146,70,0,0,37.9,0.334,28,tested_positive
10,129,76,28,122,35.9,0.28,39,tested_negative
7,133,88,15,155,32.4,0.262,37,tested_negative
7,161,86,0,0,30.4,0.165,47,tested_positive
2,108,80,0,0,27,0.259,52,tested_positive
7,136,74,26,135,26,0.647,51,tested_negative
5,155,84,44,545,38.7,0.619,34,tested_negative
1,119,86,39,220,45.6,0.808,29,tested_positive
4,96,56,17,49,20.8,0.34,26,tested_negative
5,108,72,43,75,36.1,0.263,33,tested_negative
0,78,88,29,40,36.9,0.434,21,tested_negative
0,107,62,30,74,36.6,0.757,25,tested_positive
2,128,78,37,182,43.3,1.224,31,tested_positive
1,128,48,45,194,40.5,0.613,24,tested_positive
0,161,50,0,0,21.9,0.254,65,tested_negative
6,151,62,31,120,35.5,0.692,28,tested_negative
2,146,70,38,360,28,0.337,29,tested_positive
0,126,84,29,215,30.7,0.52,24,tested_negative
14,100,78,25,184,36.6,0.412,46,tested_positive
8,112,72,0,0,23.6,0.84,58,tested_negative
0,167,0,0,0,32.3,0.839,30,tested_positive
2,144,58,33,135,31.6,0.422,25,tested_positive
5,77,82,41,42,35.8,0.156,35,tested_negative
5,115,98,0,0,52.9,0.209,28,tested_positive
3,150,76,0,0,21,0.207,37,tested_negative
2,120,76,37,105,39.7,0.215,29,tested_negative
10,161,68,23,132,25.5,0.326,47,tested_positive
0,137,68,14,148,24.8,0.143,21,tested_negative
0,128,68,19,180,30.5,1.391,25,tested_positive
2,124,68,28,205,32.9,0.875,30,tested_positive
6,80,66,30,0,26.2,0.313,41,tested_negative
0,106,70,37,148,39.4,0.605,22,tested_negative
2,155,74,17,96,26.6,0.433,27,tested_positive
3,113,50,10,85,29.5,0.626,25,tested_negative
7,109,80,31,0,35.9,1.127,43,tested_positive
2,112,68,22,94,34.1,0.315,26,tested_negative
3,99,80,11,64,19.3,0.284,30,tested_negative
3,182,74,0,0,30.5,0.345,29,tested_positive
3,115,66,39,140,38.1,0.15,28,tested_negative
6,194,78,0,0,23.5,0.129,59,tested_positive
4,129,60,12,231,27.5,0.527,31,tested_negative
3,112,74,30,0,31.6,0.197,25,tested_positive
0,124,70,20,0,27.4,0.254,36,tested_positive
13,152,90,33,29,26.8,0.731,43,tested_positive
2,112,75,32,0,35.7,0.148,21,tested_negative
1,157,72,21,168,25.6,0.123,24,tested_negative
1,122,64,32,156,35.1,0.692,30,tested_positive
10,179,70,0,0,35.1,0.2,37,tested_negative
2,102,86,36,120,45.5,0.127,23,tested_positive
6,105,70,32,68,30.8,0.122,37,tested_negative
8,118,72,19,0,23.1,1.476,46,tested_negative
2,87,58,16,52,32.7,0.166,25,tested_negative
1,180,0,0,0,43.3,0.282,41,tested_positive
12,106,80,0,0,23.6,0.137,44,tested_negative
1,95,60,18,58,23.9,0.26,22,tested_negative
0,165,76,43,255,47.9,0.259,26,tested_negative
0,117,0,0,0,33.8,0.932,44,tested_negative
5,115,76,0,0,31.2,0.343,44,tested_positive
9,152,78,34,171,34.2,0.893,33,tested_positive
7,178,84,0,0,39.9,0.331,41,tested_positive
1,130,70,13,105,25.9,0.472,22,tested_negative
1,95,74,21,73,25.9,0.673,36,tested_negative
1,0,68,35,0,32,0.389,22,tested_negative
5,122,86,0,0,34.7,0.29,33,tested_negative
8,95,72,0,0,36.8,0.485,57,tested_negative
8,126,88,36,108,38.5,0.349,49,tested_negative
1,139,46,19,83,28.7,0.654,22,tested_negative
3,116,0,0,0,23.5,0.187,23,tested_negative
3,99,62,19,74,21.8,0.279,26,tested_negative
5,0,80,32,0,41,0.346,37,tested_positive
4,92,80,0,0,42.2,0.237,29,tested_negative
4,137,84,0,0,31.2,0.252,30,tested_negative
3,61,82,28,0,34.4,0.243,46,tested_negative
1,90,62,12,43,27.2,0.58,24,tested_negative
3,90,78,0,0,42.7,0.559,21,tested_negative
9,165,88,0,0,30.4,0.302,49,tested_positive
1,125,50,40,167,33.3,0.962,28,tested_positive
13,129,0,30,0,39.9,0.569,44,tested_positive
12,88,74,40,54,35.3,0.378,48,tested_negative
1,196,76,36,249,36.5,0.875,29,tested_positive
5,189,64,33,325,31.2,0.583,29,tested_positive
5,158,70,0,0,29.8,0.207,63,tested_negative
5,103,108,37,0,39.2,0.305,65,tested_negative
4,146,78,0,0,38.5,0.52,67,tested_positive
4,147,74,25,293,34.9,0.385,30,tested_negative
5,99,54,28,83,34,0.499,30,tested_negative
6,124,72,0,0,27.6,0.368,29,tested_positive
0,101,64,17,0,21,0.252,21,tested_negative
3,81,86,16,66,27.5,0.306,22,tested_negative
1,133,102,28,140,32.8,0.234,45,tested_positive
3,173,82,48,465,38.4,2.137,25,tested_positive
0,118,64,23,89,0,1.731,21,tested_negative
0,84,64,22,66,35.8,0.545,21,tested_negative
2,105,58,40,94,34.9,0.225,25,tested_negative
2,122,52,43,158,36.2,0.816,28,tested_negative
12,140,82,43,325,39.2,0.528,58,tested_positive
0,98,82,15,84,25.2,0.299,22,tested_negative
1,87,60,37,75,37.2,0.509,22,tested_negative
4,156,75,0,0,48.3,0.238,32,tested_positive
0,93,100,39,72,43.4,1.021,35,tested_negative
1,107,72,30,82,30.8,0.821,24,tested_negative
0,105,68,22,0,20,0.236,22,tested_negative
1,109,60,8,182,25.4,0.947,21,tested_negative
1,90,62,18,59,25.1,1.268,25,tested_negative
1,125,70,24,110,24.3,0.221,25,tested_negative
1,119,54,13,50,22.3,0.205,24,tested_negative
5,116,74,29,0,32.3,0.66,35,tested_positive
8,105,100,36,0,43.3,0.239,45,tested_positive
5,144,82,26,285,32,0.452,58,tested_positive
3,100,68,23,81,31.6,0.949,28,tested_negative
1,100,66,29,196,32,0.444,42,tested_negative
5,166,76,0,0,45.7,0.34,27,tested_positive
1,131,64,14,415,23.7,0.389,21,tested_negative
4,116,72,12,87,22.1,0.463,37,tested_negative
4,158,78,0,0,32.9,0.803,31,tested_positive
2,127,58,24,275,27.7,1.6,25,tested_negative
3,96,56,34,115,24.7,0.944,39,tested_negative
0,131,66,40,0,34.3,0.196,22,tested_positive
3,82,70,0,0,21.1,0.389,25,tested_negative
3,193,70,31,0,34.9,0.241,25,tested_positive
4,95,64,0,0,32,0.161,31,tested_positive
6,137,61,0,0,24.2,0.151,55,tested_negative
5,136,84,41,88,35,0.286,35,tested_positive
9,72,78,25,0,31.6,0.28,38,tested_negative
5,168,64,0,0,32.9,0.135,41,tested_positive
2,123,48,32,165,42.1,0.52,26,tested_negative
4,115,72,0,0,28.9,0.376,46,tested_positive
0,101,62,0,0,21.9,0.336,25,tested_negative
8,197,74,0,0,25.9,1.191,39,tested_positive
1,172,68,49,579,42.4,0.702,28,tested_positive
6,102,90,39,0,35.7,0.674,28,tested_negative
1,112,72,30,176,34.4,0.528,25,tested_negative
1,143,84,23,310,42.4,1.076,22,tested_negative
1,143,74,22,61,26.2,0.256,21,tested_negative
0,138,60,35,167,34.6,0.534,21,tested_positive
3,173,84,33,474,35.7,0.258,22,tested_positive
1,97,68,21,0,27.2,1.095,22,tested_negative
4,144,82,32,0,38.5,0.554,37,tested_positive
1,83,68,0,0,18.2,0.624,27,tested_negative
3,129,64,29,115,26.4,0.219,28,tested_positive
1,119,88,41,170,45.3,0.507,26,tested_negative
2,94,68,18,76,26,0.561,21,tested_negative
0,102,64,46,78,40.6,0.496,21,tested_negative
2,115,64,22,0,30.8,0.421,21,tested_negative
8,151,78,32,210,42.9,0.516,36,tested_positive
4,184,78,39,277,37,0.264,31,tested_positive
0,94,0,0,0,0,0.256,25,tested_negative
1,181,64,30,180,34.1,0.328,38,tested_positive
0,135,94,46,145,40.6,0.284,26,tested_negative
1,95,82,25,180,35,0.233,43,tested_positive
2,99,0,0,0,22.2,0.108,23,tested_negative
3,89,74,16,85,30.4,0.551,38,tested_negative
1,80,74,11,60,30,0.527,22,tested_negative
2,139,75,0,0,25.6,0.167,29,tested_negative
1,90,68,8,0,24.5,1.138,36,tested_negative
0,141,0,0,0,42.4,0.205,29,tested_positive
12,140,85,33,0,37.4,0.244,41,tested_negative
5,147,75,0,0,29.9,0.434,28,tested_negative
1,97,70,15,0,18.2,0.147,21,tested_negative
6,107,88,0,0,36.8,0.727,31,tested_negative
0,189,104,25,0,34.3,0.435,41,tested_positive
2,83,66,23,50,32.2,0.497,22,tested_negative
4,117,64,27,120,33.2,0.23,24,tested_negative
8,108,70,0,0,30.5,0.955,33,tested_positive
4,117,62,12,0,29.7,0.38,30,tested_positive
0,180,78,63,14,59.4,2.42,25,tested_positive
1,100,72,12,70,25.3,0.658,28,tested_negative
0,95,80,45,92,36.5,0.33,26,tested_negative
0,104,64,37,64,33.6,0.51,22,tested_positive
0,120,74,18,63,30.5,0.285,26,tested_negative
1,82,64,13,95,21.2,0.415,23,tested_negative
2,134,70,0,0,28.9,0.542,23,tested_positive
0,91,68,32,210,39.9,0.381,25,tested_negative
2,119,0,0,0,19.6,0.832,72,tested_negative
2,100,54,28,105,37.8,0.498,24,tested_negative
14,175,62,30,0,33.6,0.212,38,tested_positive
1,135,54,0,0,26.7,0.687,62,tested_negative
5,86,68,28,71,30.2,0.364,24,tested_negative
10,148,84,48,237,37.6,1.001,51,tested_positive
9,134,74,33,60,25.9,0.46,81,tested_negative
9,120,72,22,56,20.8,0.733,48,tested_negative
1,71,62,0,0,21.8,0.416,26,tested_negative
8,74,70,40,49,35.3,0.705,39,tested_negative
5,88,78,30,0,27.6,0.258,37,tested_negative
10,115,98,0,0,24,1.022,34,tested_negative
0,124,56,13,105,21.8,0.452,21,tested_negative
0,74,52,10,36,27.8,0.269,22,tested_negative
0,97,64,36,100,36.8,0.6,25,tested_negative
8,120,0,0,0,30,0.183,38,tested_positive
6,154,78,41,140,46.1,0.571,27,tested_negative
1,144,82,40,0,41.3,0.607,28,tested_negative
0,137,70,38,0,33.2,0.17,22,tested_negative
0,119,66,27,0,38.8,0.259,22,tested_negative
7,136,90,0,0,29.9,0.21,50,tested_negative
4,114,64,0,0,28.9,0.126,24,tested_negative
0,137,84,27,0,27.3,0.231,59,tested_negative
2,105,80,45,191,33.7,0.711,29,tested_positive
7,114,76,17,110,23.8,0.466,31,tested_negative
8,126,74,38,75,25.9,0.162,39,tested_negative
4,132,86,31,0,28,0.419,63,tested_negative
3,158,70,30,328,35.5,0.344,35,tested_positive
0,123,88,37,0,35.2,0.197,29,tested_negative
4,85,58,22,49,27.8,0.306,28,tested_negative
0,84,82,31,125,38.2,0.233,23,tested_negative
0,145,0,0,0,44.2,0.63,31,tested_positive
0,135,68,42,250,42.3,0.365,24,tested_positive
1,139,62,41,480,40.7,0.536,21,tested_negative
0,173,78,32,265,46.5,1.159,58,tested_negative
4,99,72,17,0,25.6,0.294,28,tested_negative
8,194,80,0,0,26.1,0.551,67,tested_negative
2,83,65,28,66,36.8,0.629,24,tested_negative
2,89,90,30,0,33.5,0.292,42,tested_negative
4,99,68,38,0,32.8,0.145,33,tested_negative
4,125,70,18,122,28.9,1.144,45,tested_positive
3,80,0,0,0,0,0.174,22,tested_negative
6,166,74,0,0,26.6,0.304,66,tested_negative
5,110,68,0,0,26,0.292,30,tested_negative
2,81,72,15,76,30.1,0.547,25,tested_negative
7,195,70,33,145,25.1,0.163,55,tested_positive
6,154,74,32,193,29.3,0.839,39,tested_negative
2,117,90,19,71,25.2,0.313,21,tested_negative
3,84,72,32,0,37.2,0.267,28,tested_negative
6,0,68,41,0,39,0.727,41,tested_positive
7,94,64,25,79,33.3,0.738,41,tested_negative
3,96,78,39,0,37.3,0.238,40,tested_negative
10,75,82,0,0,33.3,0.263,38,tested_negative
0,180,90,26,90,36.5,0.314,35,tested_positive
1,130,60,23,170,28.6,0.692,21,tested_negative
2,84,50,23,76,30.4,0.968,21,tested_negative
8,120,78,0,0,25,0.409,64,tested_negative
12,84,72,31,0,29.7,0.297,46,tested_positive
0,139,62,17,210,22.1,0.207,21,tested_negative
9,91,68,0,0,24.2,0.2,58,tested_negative
2,91,62,0,0,27.3,0.525,22,tested_negative
3,99,54,19,86,25.6,0.154,24,tested_negative
3,163,70,18,105,31.6,0.268,28,tested_positive
9,145,88,34,165,30.3,0.771,53,tested_positive
7,125,86,0,0,37.6,0.304,51,tested_negative
13,76,60,0,0,32.8,0.18,41,tested_negative
6,129,90,7,326,19.6,0.582,60,tested_negative
2,68,70,32,66,25,0.187,25,tested_negative
3,124,80,33,130,33.2,0.305,26,tested_negative
6,114,0,0,0,0,0.189,26,tested_negative
9,130,70,0,0,34.2,0.652,45,tested_positive
3,125,58,0,0,31.6,0.151,24,tested_negative
3,87,60,18,0,21.8,0.444,21,tested_negative
1,97,64,19,82,18.2,0.299,21,tested_negative
3,116,74,15,105,26.3,0.107,24,tested_negative
0,117,66,31,188,30.8,0.493,22,tested_negative
0,111,65,0,0,24.6,0.66,31,tested_negative
2,122,60,18,106,29.8,0.717,22,tested_negative
0,107,76,0,0,45.3,0.686,24,tested_negative
1,86,66,52,65,41.3,0.917,29,tested_negative
6,91,0,0,0,29.8,0.501,31,tested_negative
1,77,56,30,56,33.3,1.251,24,tested_negative
4,132,0,0,0,32.9,0.302,23,tested_positive
0,105,90,0,0,29.6,0.197,46,tested_negative
0,57,60,0,0,21.7,0.735,67,tested_negative
0,127,80,37,210,36.3,0.804,23,tested_negative
3,129,92,49,155,36.4,0.968,32,tested_positive
8,100,74,40,215,39.4,0.661,43,tested_positive
3,128,72,25,190,32.4,0.549,27,tested_positive
10,90,85,32,0,34.9,0.825,56,tested_positive
4,84,90,23,56,39.5,0.159,25,tested_negative
1,88,78,29,76,32,0.365,29,tested_negative
8,186,90,35,225,34.5,0.423,37,tested_positive
5,187,76,27,207,43.6,1.034,53,tested_positive
4,131,68,21,166,33.1,0.16,28,tested_negative
1,164,82,43,67,32.8,0.341,50,tested_negative
4,189,110,31,0,28.5,0.68,37,tested_negative
1,116,70,28,0,27.4,0.204,21,tested_negative
3,84,68,30,106,31.9,0.591,25,tested_negative
6,114,88,0,0,27.8,0.247,66,tested_negative
1,88,62,24,44,29.9,0.422,23,tested_negative
1,84,64,23,115,36.9,0.471,28,tested_negative
7,124,70,33,215,25.5,0.161,37,tested_negative
1,97,70,40,0,38.1,0.218,30,tested_negative
8,110,76,0,0,27.8,0.237,58,tested_negative
11,103,68,40,0,46.2,0.126,42,tested_negative
11,85,74,0,0,30.1,0.3,35,tested_negative
6,125,76,0,0,33.8,0.121,54,tested_positive
0,198,66,32,274,41.3,0.502,28,tested_positive
1,87,68,34,77,37.6,0.401,24,tested_negative
6,99,60,19,54,26.9,0.497,32,tested_negative
0,91,80,0,0,32.4,0.601,27,tested_negative
2,95,54,14,88,26.1,0.748,22,tested_negative
1,99,72,30,18,38.6,0.412,21,tested_negative
6,92,62,32,126,32,0.085,46,tested_negative
4,154,72,29,126,31.3,0.338,37,tested_negative
0,121,66,30,165,34.3,0.203,33,tested_positive
3,78,70,0,0,32.5,0.27,39,tested_negative
2,130,96,0,0,22.6,0.268,21,tested_negative
3,111,58,31,44,29.5,0.43,22,tested_negative
2,98,60,17,120,34.7,0.198,22,tested_negative
1,143,86,30,330,30.1,0.892,23,tested_negative
1,119,44,47,63,35.5,0.28,25,tested_negative
6,108,44,20,130,24,0.813,35,tested_negative
2,118,80,0,0,42.9,0.693,21,tested_positive
10,133,68,0,0,27,0.245,36,tested_negative
2,197,70,99,0,34.7,0.575,62,tested_positive
0,151,90,46,0,42.1,0.371,21,tested_positive
6,109,60,27,0,25,0.206,27,tested_negative
12,121,78,17,0,26.5,0.259,62,tested_negative
8,100,76,0,0,38.7,0.19,42,tested_negative
8,124,76,24,600,28.7,0.687,52,tested_positive
1,93,56,11,0,22.5,0.417,22,tested_negative
8,143,66,0,0,34.9,0.129,41,tested_positive
6,103,66,0,0,24.3,0.249,29,tested_negative
3,176,86,27,156,33.3,1.154,52,tested_positive
0,73,0,0,0,21.1,0.342,25,tested_negative
11,111,84,40,0,46.8,0.925,45,tested_positive
2,112,78,50,140,39.4,0.175,24,tested_negative
3,132,80,0,0,34.4,0.402,44,tested_positive
2,82,52,22,115,28.5,1.699,25,tested_negative
6,123,72,45,230,33.6,0.733,34,tested_negative
0,188,82,14,185,32,0.682,22,tested_positive
0,67,76,0,0,45.3,0.194,46,tested_negative
1,89,24,19,25,27.8,0.559,21,tested_negative
1,173,74,0,0,36.8,0.088,38,tested_positive
1,109,38,18,120,23.1,0.407,26,tested_negative
1,108,88,19,0,27.1,0.4,24,tested_negative
6,96,0,0,0,23.7,0.19,28,tested_negative
1,124,74,36,0,27.8,0.1,30,tested_negative
7,150,78,29,126,35.2,0.692,54,tested_positive
4,183,0,0,0,28.4,0.212,36,tested_positive
1,124,60,32,0,35.8,0.514,21,tested_negative
1,181,78,42,293,40,1.258,22,tested_positive
1,92,62,25,41,19.5,0.482,25,tested_negative
0,152,82,39,272,41.5,0.27,27,tested_negative
1,111,62,13,182,24,0.138,23,tested_negative
3,106,54,21,158,30.9,0.292,24,tested_negative
3,174,58,22,194,32.9,0.593,36,tested_positive
7,168,88,42,321,38.2,0.787,40,tested_positive
6,105,80,28,0,32.5,0.878,26,tested_negative
11,138,74,26,144,36.1,0.557,50,tested_positive
3,106,72,0,0,25.8,0.207,27,tested_negative
6,117,96,0,0,28.7,0.157,30,tested_negative
2,68,62,13,15,20.1,0.257,23,tested_negative
9,112,82,24,0,28.2,1.282,50,tested_positive
0,119,0,0,0,32.4,0.141,24,tested_positive
2,112,86,42,160,38.4,0.246,28,tested_negative
2,92,76,20,0,24.2,1.698,28,tested_negative
6,183,94,0,0,40.8,1.461,45,tested_negative
0,94,70,27,115,43.5,0.347,21,tested_negative
2,108,64,0,0,30.8,0.158,21,tested_negative
4,90,88,47,54,37.7,0.362,29,tested_negative
0,125,68,0,0,24.7,0.206,21,tested_negative
0,132,78,0,0,32.4,0.393,21,tested_negative
5,128,80,0,0,34.6,0.144,45,tested_negative
4,94,65,22,0,24.7,0.148,21,tested_negative
7,114,64,0,0,27.4,0.732,34,tested_positive
0,102,78,40,90,34.5,0.238,24,tested_negative
2,111,60,0,0,26.2,0.343,23,tested_negative
1,128,82,17,183,27.5,0.115,22,tested_negative
10,92,62,0,0,25.9,0.167,31,tested_negative
13,104,72,0,0,31.2,0.465,38,tested_positive
5,104,74,0,0,28.8,0.153,48,tested_negative
2,94,76,18,66,31.6,0.649,23,tested_negative
7,97,76,32,91,40.9,0.871,32,tested_positive
1,100,74,12,46,19.5,0.149,28,tested_negative
0,102,86,17,105,29.3,0.695,27,tested_negative
4,128,70,0,0,34.3,0.303,24,tested_negative
6,147,80,0,0,29.5,0.178,50,tested_positive
4,90,0,0,0,28,0.61,31,tested_negative
3,103,72,30,152,27.6,0.73,27,tested_negative
2,157,74,35,440,39.4,0.134,30,tested_negative
1,167,74,17,144,23.4,0.447,33,tested_positive
0,179,50,36,159,37.8,0.455,22,tested_positive
11,136,84,35,130,28.3,0.26,42,tested_positive
0,107,60,25,0,26.4,0.133,23,tested_negative
1,91,54,25,100,25.2,0.234,23,tested_negative
1,117,60,23,106,33.8,0.466,27,tested_negative
5,123,74,40,77,34.1,0.269,28,tested_negative
2,120,54,0,0,26.8,0.455,27,tested_negative
1,106,70,28,135,34.2,0.142,22,tested_negative
2,155,52,27,540,38.7,0.24,25,tested_positive
2,101,58,35,90,21.8,0.155,22,tested_negative
1,120,80,48,200,38.9,1.162,41,tested_negative
11,127,106,0,0,39,0.19,51,tested_negative
3,80,82,31,70,34.2,1.292,27,tested_positive
10,162,84,0,0,27.7,0.182,54,tested_negative
1,199,76,43,0,42.9,1.394,22,tested_positive
8,167,106,46,231,37.6,0.165,43,tested_positive
9,145,80,46,130,37.9,0.637,40,tested_positive
6,115,60,39,0,33.7,0.245,40,tested_positive
1,112,80,45,132,34.8,0.217,24,tested_negative
4,145,82,18,0,32.5,0.235,70,tested_positive
10,111,70,27,0,27.5,0.141,40,tested_positive
6,98,58,33,190,34,0.43,43,tested_negative
9,154,78,30,100,30.9,0.164,45,tested_negative
6,165,68,26,168,33.6,0.631,49,tested_negative
1,99,58,10,0,25.4,0.551,21,tested_negative
10,68,106,23,49,35.5,0.285,47,tested_negative
3,123,100,35,240,57.3,0.88,22,tested_negative
8,91,82,0,0,35.6,0.587,68,tested_negative
6,195,70,0,0,30.9,0.328,31,tested_positive
9,156,86,0,0,24.8,0.23,53,tested_positive
0,93,60,0,0,35.3,0.263,25,tested_negative
3,121,52,0,0,36,0.127,25,tested_positive
2,101,58,17,265,24.2,0.614,23,tested_negative
2,56,56,28,45,24.2,0.332,22,tested_negative
0,162,76,36,0,49.6,0.364,26,tested_positive
0,95,64,39,105,44.6,0.366,22,tested_negative
4,125,80,0,0,32.3,0.536,27,tested_positive
5,136,82,0,0,0,0.64,69,tested_negative
2,129,74,26,205,33.2,0.591,25,tested_negative
3,130,64,0,0,23.1,0.314,22,tested_negative
1,107,50,19,0,28.3,0.181,29,tested_negative
1,140,74,26,180,24.1,0.828,23,tested_negative
1,144,82,46,180,46.1,0.335,46,tested_positive
8,107,80,0,0,24.6,0.856,34,tested_negative
13,158,114,0,0,42.3,0.257,44,tested_positive
2,121,70,32,95,39.1,0.886,23,tested_negative
7,129,68,49,125,38.5,0.439,43,tested_positive
2,90,60,0,0,23.5,0.191,25,tested_negative
7,142,90,24,480,30.4,0.128,43,tested_positive
3,169,74,19,125,29.9,0.268,31,tested_positive
0,99,0,0,0,25,0.253,22,tested_negative
4,127,88,11,155,34.5,0.598,28,tested_negative
4,118,70,0,0,44.5,0.904,26,tested_negative
2,122,76,27,200,35.9,0.483,26,tested_negative
6,125,78,31,0,27.6,0.565,49,tested_positive
1,168,88,29,0,35,0.905,52,tested_positive
2,129,0,0,0,38.5,0.304,41,tested_negative
4,110,76,20,100,28.4,0.118,27,tested_negative
6,80,80,36,0,39.8,0.177,28,tested_negative
10,115,0,0,0,0,0.261,30,tested_positive
2,127,46,21,335,34.4,0.176,22,tested_negative
9,164,78,0,0,32.8,0.148,45,tested_positive
2,93,64,32,160,38,0.674,23,tested_positive
3,158,64,13,387,31.2,0.295,24,tested_negative
5,126,78,27,22,29.6,0.439,40,tested_negative
10,129,62,36,0,41.2,0.441,38,tested_positive
0,134,58,20,291,26.4,0.352,21,tested_negative
3,102,74,0,0,29.5,0.121,32,tested_negative
7,187,50,33,392,33.9,0.826,34,tested_positive
3,173,78,39,185,33.8,0.97,31,tested_positive
10,94,72,18,0,23.1,0.595,56,tested_negative
1,108,60,46,178,35.5,0.415,24,tested_negative
5,97,76,27,0,35.6,0.378,52,tested_positive
4,83,86,19,0,29.3,0.317,34,tested_negative
1,114,66,36,200,38.1,0.289,21,tested_negative
1,149,68,29,127,29.3,0.349,42,tested_positive
5,117,86,30,105,39.1,0.251,42,tested_negative
1,111,94,0,0,32.8,0.265,45,tested_negative
4,112,78,40,0,39.4,0.236,38,tested_negative
1,116,78,29,180,36.1,0.496,25,tested_negative
0,141,84,26,0,32.4,0.433,22,tested_negative
2,175,88,0,0,22.9,0.326,22,tested_negative
2,92,52,0,0,30.1,0.141,22,tested_negative
3,130,78,23,79,28.4,0.323,34,tested_positive
8,120,86,0,0,28.4,0.259,22,tested_positive
2,174,88,37,120,44.5,0.646,24,tested_positive
2,106,56,27,165,29,0.426,22,tested_negative
2,105,75,0,0,23.3,0.56,53,tested_negative
4,95,60,32,0,35.4,0.284,28,tested_negative
0,126,86,27,120,27.4,0.515,21,tested_negative
8,65,72,23,0,32,0.6,42,tested_negative
2,99,60,17,160,36.6,0.453,21,tested_negative
1,102,74,0,0,39.5,0.293,42,tested_positive
11,120,80,37,150,42.3,0.785,48,tested_positive
3,102,44,20,94,30.8,0.4,26,tested_negative
1,109,58,18,116,28.5,0.219,22,tested_negative
9,140,94,0,0,32.7,0.734,45,tested_positive
13,153,88,37,140,40.6,1.174,39,tested_negative
12,100,84,33,105,30,0.488,46,tested_negative
1,147,94,41,0,49.3,0.358,27,tested_positive
1,81,74,41,57,46.3,1.096,32,tested_negative
3,187,70,22,200,36.4,0.408,36,tested_positive
6,162,62,0,0,24.3,0.178,50,tested_positive
4,136,70,0,0,31.2,1.182,22,tested_positive
1,121,78,39,74,39,0.261,28,tested_negative
3,108,62,24,0,26,0.223,25,tested_negative
0,181,88,44,510,43.3,0.222,26,tested_positive
8,154,78,32,0,32.4,0.443,45,tested_positive
1,128,88,39,110,36.5,1.057,37,tested_positive
7,137,90,41,0,32,0.391,39,tested_negative
0,123,72,0,0,36.3,0.258,52,tested_positive
1,106,76,0,0,37.5,0.197,26,tested_negative
6,190,92,0,0,35.5,0.278,66,tested_positive
2,88,58,26,16,28.4,0.766,22,tested_negative
9,170,74,31,0,44,0.403,43,tested_positive
9,89,62,0,0,22.5,0.142,33,tested_negative
10,101,76,48,180,32.9,0.171,63,tested_negative
2,122,70,27,0,36.8,0.34,27,tested_negative
5,121,72,23,112,26.2,0.245,30,tested_negative
1,126,60,0,0,30.1,0.349,47,tested_positive
1,93,70,31,0,30.4,0.315,23,tested_negative

428
tests/data/ecoli.arff Executable file
View File

@@ -0,0 +1,428 @@
%
% 1. Title: Protein Localization Sites
%
%
% 2. Creator and Maintainer:
% Kenta Nakai
% Institue of Molecular and Cellular Biology
% Osaka, University
% 1-3 Yamada-oka, Suita 565 Japan
% nakai@imcb.osaka-u.ac.jp
% http://www.imcb.osaka-u.ac.jp/nakai/psort.html
% Donor: Paul Horton (paulh@cs.berkeley.edu)
% Date: September, 1996
% See also: yeast database
%
% 3. Past Usage.
% Reference: "A Probablistic Classification System for Predicting the Cellular
% Localization Sites of Proteins", Paul Horton & Kenta Nakai,
% Intelligent Systems in Molecular Biology, 109-115.
% St. Louis, USA 1996.
% Results: 81% for E.coli with an ad hoc structured
% probability model. Also similar accuracy for Binary Decision Tree and
% Bayesian Classifier methods applied by the same authors in
% unpublished results.
%
% Predicted Attribute: Localization site of protein. ( non-numeric ).
%
%
% 4. The references below describe a predecessor to this dataset and its
% development. They also give results (not cross-validated) for classification
% by a rule-based expert system with that version of the dataset.
%
% Reference: "Expert Sytem for Predicting Protein Localization Sites in
% Gram-Negative Bacteria", Kenta Nakai & Minoru Kanehisa,
% PROTEINS: Structure, Function, and Genetics 11:95-110, 1991.
%
% Reference: "A Knowledge Base for Predicting Protein Localization Sites in
% Eukaryotic Cells", Kenta Nakai & Minoru Kanehisa,
% Genomics 14:897-911, 1992.
%
%
% 5. Number of Instances: 336 for the E.coli dataset and
%
%
% 6. Number of Attributes.
% for E.coli dataset: 8 ( 7 predictive, 1 name )
%
% 7. Attribute Information.
%
% 1. Sequence Name: Accession number for the SWISS-PROT database
% 2. mcg: McGeoch's method for signal sequence recognition.
% 3. gvh: von Heijne's method for signal sequence recognition.
% 4. lip: von Heijne's Signal Peptidase II consensus sequence score.
% Binary attribute.
% 5. chg: Presence of charge on N-terminus of predicted lipoproteins.
% Binary attribute.
% 6. aac: score of discriminant analysis of the amino acid content of
% outer membrane and periplasmic proteins.
% 7. alm1: score of the ALOM membrane spanning region prediction program.
% 8. alm2: score of ALOM program after excluding putative cleavable signal
% regions from the sequence.
%
% NOTE - the sequence name has been removed
%
% 8. Missing Attribute Values: None.
%
%
% 9. Class Distribution. The class is the localization site. Please see Nakai &
% Kanehisa referenced above for more details.
%
% cp (cytoplasm) 143
% im (inner membrane without signal sequence) 77
% pp (perisplasm) 52
% imU (inner membrane, uncleavable signal sequence) 35
% om (outer membrane) 20
% omL (outer membrane lipoprotein) 5
% imL (inner membrane lipoprotein) 2
% imS (inner membrane, cleavable signal sequence) 2
@relation ecoli
@attribute mcg numeric
@attribute gvh numeric
@attribute lip numeric
@attribute chg numeric
@attribute aac numeric
@attribute alm1 numeric
@attribute alm2 numeric
@attribute class {cp,im,pp,imU,om,omL,imL,imS}
@data
0.49,0.29,0.48,0.5,0.56,0.24,0.35,cp
0.07,0.4,0.48,0.5,0.54,0.35,0.44,cp
0.56,0.4,0.48,0.5,0.49,0.37,0.46,cp
0.59,0.49,0.48,0.5,0.52,0.45,0.36,cp
0.23,0.32,0.48,0.5,0.55,0.25,0.35,cp
0.67,0.39,0.48,0.5,0.36,0.38,0.46,cp
0.29,0.28,0.48,0.5,0.44,0.23,0.34,cp
0.21,0.34,0.48,0.5,0.51,0.28,0.39,cp
0.2,0.44,0.48,0.5,0.46,0.51,0.57,cp
0.42,0.4,0.48,0.5,0.56,0.18,0.3,cp
0.42,0.24,0.48,0.5,0.57,0.27,0.37,cp
0.25,0.48,0.48,0.5,0.44,0.17,0.29,cp
0.39,0.32,0.48,0.5,0.46,0.24,0.35,cp
0.51,0.5,0.48,0.5,0.46,0.32,0.35,cp
0.22,0.43,0.48,0.5,0.48,0.16,0.28,cp
0.25,0.4,0.48,0.5,0.46,0.44,0.52,cp
0.34,0.45,0.48,0.5,0.38,0.24,0.35,cp
0.44,0.27,0.48,0.5,0.55,0.52,0.58,cp
0.23,0.4,0.48,0.5,0.39,0.28,0.38,cp
0.41,0.57,0.48,0.5,0.39,0.21,0.32,cp
0.4,0.45,0.48,0.5,0.38,0.22,0,cp
0.31,0.23,0.48,0.5,0.73,0.05,0.14,cp
0.51,0.54,0.48,0.5,0.41,0.34,0.43,cp
0.3,0.16,0.48,0.5,0.56,0.11,0.23,cp
0.36,0.39,0.48,0.5,0.48,0.22,0.23,cp
0.29,0.37,0.48,0.5,0.48,0.44,0.52,cp
0.25,0.4,0.48,0.5,0.47,0.33,0.42,cp
0.21,0.51,0.48,0.5,0.5,0.32,0.41,cp
0.43,0.37,0.48,0.5,0.53,0.35,0.44,cp
0.43,0.39,0.48,0.5,0.47,0.31,0.41,cp
0.53,0.38,0.48,0.5,0.44,0.26,0.36,cp
0.34,0.33,0.48,0.5,0.38,0.35,0.44,cp
0.56,0.51,0.48,0.5,0.34,0.37,0.46,cp
0.4,0.29,0.48,0.5,0.42,0.35,0.44,cp
0.24,0.35,0.48,0.5,0.31,0.19,0.31,cp
0.36,0.54,0.48,0.5,0.41,0.38,0.46,cp
0.29,0.52,0.48,0.5,0.42,0.29,0.39,cp
0.65,0.47,0.48,0.5,0.59,0.3,0.4,cp
0.32,0.42,0.48,0.5,0.35,0.28,0.38,cp
0.38,0.46,0.48,0.5,0.48,0.22,0.29,cp
0.33,0.45,0.48,0.5,0.52,0.32,0.41,cp
0.3,0.37,0.48,0.5,0.59,0.41,0.49,cp
0.4,0.5,0.48,0.5,0.45,0.39,0.47,cp
0.28,0.38,0.48,0.5,0.5,0.33,0.42,cp
0.61,0.45,0.48,0.5,0.48,0.35,0.41,cp
0.17,0.38,0.48,0.5,0.45,0.42,0.5,cp
0.44,0.35,0.48,0.5,0.55,0.55,0.61,cp
0.43,0.4,0.48,0.5,0.39,0.28,0.39,cp
0.42,0.35,0.48,0.5,0.58,0.15,0.27,cp
0.23,0.33,0.48,0.5,0.43,0.33,0.43,cp
0.37,0.52,0.48,0.5,0.42,0.42,0.36,cp
0.29,0.3,0.48,0.5,0.45,0.03,0.17,cp
0.22,0.36,0.48,0.5,0.35,0.39,0.47,cp
0.23,0.58,0.48,0.5,0.37,0.53,0.59,cp
0.47,0.47,0.48,0.5,0.22,0.16,0.26,cp
0.54,0.47,0.48,0.5,0.28,0.33,0.42,cp
0.51,0.37,0.48,0.5,0.35,0.36,0.45,cp
0.4,0.35,0.48,0.5,0.45,0.33,0.42,cp
0.44,0.34,0.48,0.5,0.3,0.33,0.43,cp
0.42,0.38,0.48,0.5,0.54,0.34,0.43,cp
0.44,0.56,0.48,0.5,0.5,0.46,0.54,cp
0.52,0.36,0.48,0.5,0.41,0.28,0.38,cp
0.36,0.41,0.48,0.5,0.48,0.47,0.54,cp
0.18,0.3,0.48,0.5,0.46,0.24,0.35,cp
0.47,0.29,0.48,0.5,0.51,0.33,0.43,cp
0.24,0.43,0.48,0.5,0.54,0.52,0.59,cp
0.25,0.37,0.48,0.5,0.41,0.33,0.42,cp
0.52,0.57,0.48,0.5,0.42,0.47,0.54,cp
0.25,0.37,0.48,0.5,0.43,0.26,0.36,cp
0.35,0.48,0.48,0.5,0.56,0.4,0.48,cp
0.26,0.26,0.48,0.5,0.34,0.25,0.35,cp
0.44,0.51,0.48,0.5,0.47,0.26,0.36,cp
0.37,0.5,0.48,0.5,0.42,0.36,0.45,cp
0.44,0.42,0.48,0.5,0.42,0.25,0.2,cp
0.24,0.43,0.48,0.5,0.37,0.28,0.38,cp
0.42,0.3,0.48,0.5,0.48,0.26,0.36,cp
0.48,0.42,0.48,0.5,0.45,0.25,0.35,cp
0.41,0.48,0.48,0.5,0.51,0.44,0.51,cp
0.44,0.28,0.48,0.5,0.43,0.27,0.37,cp
0.29,0.41,0.48,0.5,0.48,0.38,0.46,cp
0.34,0.28,0.48,0.5,0.41,0.35,0.44,cp
0.41,0.43,0.48,0.5,0.45,0.31,0.41,cp
0.29,0.47,0.48,0.5,0.41,0.23,0.34,cp
0.34,0.55,0.48,0.5,0.58,0.31,0.41,cp
0.36,0.56,0.48,0.5,0.43,0.45,0.53,cp
0.4,0.46,0.48,0.5,0.52,0.49,0.56,cp
0.5,0.49,0.48,0.5,0.49,0.46,0.53,cp
0.52,0.44,0.48,0.5,0.37,0.36,0.42,cp
0.5,0.51,0.48,0.5,0.27,0.23,0.34,cp
0.53,0.42,0.48,0.5,0.16,0.29,0.39,cp
0.34,0.46,0.48,0.5,0.52,0.35,0.44,cp
0.4,0.42,0.48,0.5,0.37,0.27,0.27,cp
0.41,0.43,0.48,0.5,0.5,0.24,0.25,cp
0.3,0.45,0.48,0.5,0.36,0.21,0.32,cp
0.31,0.47,0.48,0.5,0.29,0.28,0.39,cp
0.64,0.76,0.48,0.5,0.45,0.35,0.38,cp
0.35,0.37,0.48,0.5,0.3,0.34,0.43,cp
0.57,0.54,0.48,0.5,0.37,0.28,0.33,cp
0.65,0.55,0.48,0.5,0.34,0.37,0.28,cp
0.51,0.46,0.48,0.5,0.58,0.31,0.41,cp
0.38,0.4,0.48,0.5,0.63,0.25,0.35,cp
0.24,0.57,0.48,0.5,0.63,0.34,0.43,cp
0.38,0.26,0.48,0.5,0.54,0.16,0.28,cp
0.33,0.47,0.48,0.5,0.53,0.18,0.29,cp
0.24,0.34,0.48,0.5,0.38,0.3,0.4,cp
0.26,0.5,0.48,0.5,0.44,0.32,0.41,cp
0.44,0.49,0.48,0.5,0.39,0.38,0.4,cp
0.43,0.32,0.48,0.5,0.33,0.45,0.52,cp
0.49,0.43,0.48,0.5,0.49,0.3,0.4,cp
0.47,0.28,0.48,0.5,0.56,0.2,0.25,cp
0.32,0.33,0.48,0.5,0.6,0.06,0.2,cp
0.34,0.35,0.48,0.5,0.51,0.49,0.56,cp
0.35,0.34,0.48,0.5,0.46,0.3,0.27,cp
0.38,0.3,0.48,0.5,0.43,0.29,0.39,cp
0.38,0.44,0.48,0.5,0.43,0.2,0.31,cp
0.41,0.51,0.48,0.5,0.58,0.2,0.31,cp
0.34,0.42,0.48,0.5,0.41,0.34,0.43,cp
0.51,0.49,0.48,0.5,0.53,0.14,0.26,cp
0.25,0.51,0.48,0.5,0.37,0.42,0.5,cp
0.29,0.28,0.48,0.5,0.5,0.42,0.5,cp
0.25,0.26,0.48,0.5,0.39,0.32,0.42,cp
0.24,0.41,0.48,0.5,0.49,0.23,0.34,cp
0.17,0.39,0.48,0.5,0.53,0.3,0.39,cp
0.04,0.31,0.48,0.5,0.41,0.29,0.39,cp
0.61,0.36,0.48,0.5,0.49,0.35,0.44,cp
0.34,0.51,0.48,0.5,0.44,0.37,0.46,cp
0.28,0.33,0.48,0.5,0.45,0.22,0.33,cp
0.4,0.46,0.48,0.5,0.42,0.35,0.44,cp
0.23,0.34,0.48,0.5,0.43,0.26,0.37,cp
0.37,0.44,0.48,0.5,0.42,0.39,0.47,cp
0,0.38,0.48,0.5,0.42,0.48,0.55,cp
0.39,0.31,0.48,0.5,0.38,0.34,0.43,cp
0.3,0.44,0.48,0.5,0.49,0.22,0.33,cp
0.27,0.3,0.48,0.5,0.71,0.28,0.39,cp
0.17,0.52,0.48,0.5,0.49,0.37,0.46,cp
0.36,0.42,0.48,0.5,0.53,0.32,0.41,cp
0.3,0.37,0.48,0.5,0.43,0.18,0.3,cp
0.26,0.4,0.48,0.5,0.36,0.26,0.37,cp
0.4,0.41,0.48,0.5,0.55,0.22,0.33,cp
0.22,0.34,0.48,0.5,0.42,0.29,0.39,cp
0.44,0.35,0.48,0.5,0.44,0.52,0.59,cp
0.27,0.42,0.48,0.5,0.37,0.38,0.43,cp
0.16,0.43,0.48,0.5,0.54,0.27,0.37,cp
0.06,0.61,0.48,0.5,0.49,0.92,0.37,im
0.44,0.52,0.48,0.5,0.43,0.47,0.54,im
0.63,0.47,0.48,0.5,0.51,0.82,0.84,im
0.23,0.48,0.48,0.5,0.59,0.88,0.89,im
0.34,0.49,0.48,0.5,0.58,0.85,0.8,im
0.43,0.4,0.48,0.5,0.58,0.75,0.78,im
0.46,0.61,0.48,0.5,0.48,0.86,0.87,im
0.27,0.35,0.48,0.5,0.51,0.77,0.79,im
0.52,0.39,0.48,0.5,0.65,0.71,0.73,im
0.29,0.47,0.48,0.5,0.71,0.65,0.69,im
0.55,0.47,0.48,0.5,0.57,0.78,0.8,im
0.12,0.67,0.48,0.5,0.74,0.58,0.63,im
0.4,0.5,0.48,0.5,0.65,0.82,0.84,im
0.73,0.36,0.48,0.5,0.53,0.91,0.92,im
0.84,0.44,0.48,0.5,0.48,0.71,0.74,im
0.48,0.45,0.48,0.5,0.6,0.78,0.8,im
0.54,0.49,0.48,0.5,0.4,0.87,0.88,im
0.48,0.41,0.48,0.5,0.51,0.9,0.88,im
0.5,0.66,0.48,0.5,0.31,0.92,0.92,im
0.72,0.46,0.48,0.5,0.51,0.66,0.7,im
0.47,0.55,0.48,0.5,0.58,0.71,0.75,im
0.33,0.56,0.48,0.5,0.33,0.78,0.8,im
0.64,0.58,0.48,0.5,0.48,0.78,0.73,im
0.54,0.57,0.48,0.5,0.56,0.81,0.83,im
0.47,0.59,0.48,0.5,0.52,0.76,0.79,im
0.63,0.5,0.48,0.5,0.59,0.85,0.86,im
0.49,0.42,0.48,0.5,0.53,0.79,0.81,im
0.31,0.5,0.48,0.5,0.57,0.84,0.85,im
0.74,0.44,0.48,0.5,0.55,0.88,0.89,im
0.33,0.45,0.48,0.5,0.45,0.88,0.89,im
0.45,0.4,0.48,0.5,0.61,0.74,0.77,im
0.71,0.4,0.48,0.5,0.71,0.7,0.74,im
0.5,0.37,0.48,0.5,0.66,0.64,0.69,im
0.66,0.53,0.48,0.5,0.59,0.66,0.66,im
0.6,0.61,0.48,0.5,0.54,0.67,0.71,im
0.83,0.37,0.48,0.5,0.61,0.71,0.74,im
0.34,0.51,0.48,0.5,0.67,0.9,0.9,im
0.63,0.54,0.48,0.5,0.65,0.79,0.81,im
0.7,0.4,0.48,0.5,0.56,0.86,0.83,im
0.6,0.5,1,0.5,0.54,0.77,0.8,im
0.16,0.51,0.48,0.5,0.33,0.39,0.48,im
0.74,0.7,0.48,0.5,0.66,0.65,0.69,im
0.2,0.46,0.48,0.5,0.57,0.78,0.81,im
0.89,0.55,0.48,0.5,0.51,0.72,0.76,im
0.7,0.46,0.48,0.5,0.56,0.78,0.73,im
0.12,0.43,0.48,0.5,0.63,0.7,0.74,im
0.61,0.52,0.48,0.5,0.54,0.67,0.52,im
0.33,0.37,0.48,0.5,0.46,0.65,0.69,im
0.63,0.65,0.48,0.5,0.66,0.67,0.71,im
0.41,0.51,0.48,0.5,0.53,0.75,0.78,im
0.34,0.67,0.48,0.5,0.52,0.76,0.79,im
0.58,0.34,0.48,0.5,0.56,0.87,0.81,im
0.59,0.56,0.48,0.5,0.55,0.8,0.82,im
0.51,0.4,0.48,0.5,0.57,0.62,0.67,im
0.5,0.57,0.48,0.5,0.71,0.61,0.66,im
0.6,0.46,0.48,0.5,0.45,0.81,0.83,im
0.37,0.47,0.48,0.5,0.39,0.76,0.79,im
0.58,0.55,0.48,0.5,0.57,0.7,0.74,im
0.36,0.47,0.48,0.5,0.51,0.69,0.72,im
0.39,0.41,0.48,0.5,0.52,0.72,0.75,im
0.35,0.51,0.48,0.5,0.61,0.71,0.74,im
0.31,0.44,0.48,0.5,0.5,0.79,0.82,im
0.61,0.66,0.48,0.5,0.46,0.87,0.88,im
0.48,0.49,0.48,0.5,0.52,0.77,0.71,im
0.11,0.5,0.48,0.5,0.58,0.72,0.68,im
0.31,0.36,0.48,0.5,0.58,0.94,0.94,im
0.68,0.51,0.48,0.5,0.71,0.75,0.78,im
0.69,0.39,0.48,0.5,0.57,0.76,0.79,im
0.52,0.54,0.48,0.5,0.62,0.76,0.79,im
0.46,0.59,0.48,0.5,0.36,0.76,0.23,im
0.36,0.45,0.48,0.5,0.38,0.79,0.17,im
0,0.51,0.48,0.5,0.35,0.67,0.44,im
0.1,0.49,0.48,0.5,0.41,0.67,0.21,im
0.3,0.51,0.48,0.5,0.42,0.61,0.34,im
0.61,0.47,0.48,0.5,0,0.8,0.32,im
0.63,0.75,0.48,0.5,0.64,0.73,0.66,im
0.71,0.52,0.48,0.5,0.64,1,0.99,im
0.85,0.53,0.48,0.5,0.53,0.52,0.35,imS
0.63,0.49,0.48,0.5,0.54,0.76,0.79,imS
0.75,0.55,1,1,0.4,0.47,0.3,imL
0.7,0.39,1,0.5,0.51,0.82,0.84,imL
0.72,0.42,0.48,0.5,0.65,0.77,0.79,imU
0.79,0.41,0.48,0.5,0.66,0.81,0.83,imU
0.83,0.48,0.48,0.5,0.65,0.76,0.79,imU
0.69,0.43,0.48,0.5,0.59,0.74,0.77,imU
0.79,0.36,0.48,0.5,0.46,0.82,0.7,imU
0.78,0.33,0.48,0.5,0.57,0.77,0.79,imU
0.75,0.37,0.48,0.5,0.64,0.7,0.74,imU
0.59,0.29,0.48,0.5,0.64,0.75,0.77,imU
0.67,0.37,0.48,0.5,0.54,0.64,0.68,imU
0.66,0.48,0.48,0.5,0.54,0.7,0.74,imU
0.64,0.46,0.48,0.5,0.48,0.73,0.76,imU
0.76,0.71,0.48,0.5,0.5,0.71,0.75,imU
0.84,0.49,0.48,0.5,0.55,0.78,0.74,imU
0.77,0.55,0.48,0.5,0.51,0.78,0.74,imU
0.81,0.44,0.48,0.5,0.42,0.67,0.68,imU
0.58,0.6,0.48,0.5,0.59,0.73,0.76,imU
0.63,0.42,0.48,0.5,0.48,0.77,0.8,imU
0.62,0.42,0.48,0.5,0.58,0.79,0.81,imU
0.86,0.39,0.48,0.5,0.59,0.89,0.9,imU
0.81,0.53,0.48,0.5,0.57,0.87,0.88,imU
0.87,0.49,0.48,0.5,0.61,0.76,0.79,imU
0.47,0.46,0.48,0.5,0.62,0.74,0.77,imU
0.76,0.41,0.48,0.5,0.5,0.59,0.62,imU
0.7,0.53,0.48,0.5,0.7,0.86,0.87,imU
0.64,0.45,0.48,0.5,0.67,0.61,0.66,imU
0.81,0.52,0.48,0.5,0.57,0.78,0.8,imU
0.73,0.26,0.48,0.5,0.57,0.75,0.78,imU
0.49,0.61,1,0.5,0.56,0.71,0.74,imU
0.88,0.42,0.48,0.5,0.52,0.73,0.75,imU
0.84,0.54,0.48,0.5,0.75,0.92,0.7,imU
0.63,0.51,0.48,0.5,0.64,0.72,0.76,imU
0.86,0.55,0.48,0.5,0.63,0.81,0.83,imU
0.79,0.54,0.48,0.5,0.5,0.66,0.68,imU
0.57,0.38,0.48,0.5,0.06,0.49,0.33,imU
0.78,0.44,0.48,0.5,0.45,0.73,0.68,imU
0.78,0.68,0.48,0.5,0.83,0.4,0.29,om
0.63,0.69,0.48,0.5,0.65,0.41,0.28,om
0.67,0.88,0.48,0.5,0.73,0.5,0.25,om
0.61,0.75,0.48,0.5,0.51,0.33,0.33,om
0.67,0.84,0.48,0.5,0.74,0.54,0.37,om
0.74,0.9,0.48,0.5,0.57,0.53,0.29,om
0.73,0.84,0.48,0.5,0.86,0.58,0.29,om
0.75,0.76,0.48,0.5,0.83,0.57,0.3,om
0.77,0.57,0.48,0.5,0.88,0.53,0.2,om
0.74,0.78,0.48,0.5,0.75,0.54,0.15,om
0.68,0.76,0.48,0.5,0.84,0.45,0.27,om
0.56,0.68,0.48,0.5,0.77,0.36,0.45,om
0.65,0.51,0.48,0.5,0.66,0.54,0.33,om
0.52,0.81,0.48,0.5,0.72,0.38,0.38,om
0.64,0.57,0.48,0.5,0.7,0.33,0.26,om
0.6,0.76,1,0.5,0.77,0.59,0.52,om
0.69,0.59,0.48,0.5,0.77,0.39,0.21,om
0.63,0.49,0.48,0.5,0.79,0.45,0.28,om
0.71,0.71,0.48,0.5,0.68,0.43,0.36,om
0.68,0.63,0.48,0.5,0.73,0.4,0.3,om
0.77,0.57,1,0.5,0.37,0.54,0.01,omL
0.66,0.49,1,0.5,0.54,0.56,0.36,omL
0.71,0.46,1,0.5,0.52,0.59,0.3,omL
0.67,0.55,1,0.5,0.66,0.58,0.16,omL
0.68,0.49,1,0.5,0.62,0.55,0.28,omL
0.74,0.49,0.48,0.5,0.42,0.54,0.36,pp
0.7,0.61,0.48,0.5,0.56,0.52,0.43,pp
0.66,0.86,0.48,0.5,0.34,0.41,0.36,pp
0.73,0.78,0.48,0.5,0.58,0.51,0.31,pp
0.65,0.57,0.48,0.5,0.47,0.47,0.51,pp
0.72,0.86,0.48,0.5,0.17,0.55,0.21,pp
0.67,0.7,0.48,0.5,0.46,0.45,0.33,pp
0.67,0.81,0.48,0.5,0.54,0.49,0.23,pp
0.67,0.61,0.48,0.5,0.51,0.37,0.38,pp
0.63,1,0.48,0.5,0.35,0.51,0.49,pp
0.57,0.59,0.48,0.5,0.39,0.47,0.33,pp
0.71,0.71,0.48,0.5,0.4,0.54,0.39,pp
0.66,0.74,0.48,0.5,0.31,0.38,0.43,pp
0.67,0.81,0.48,0.5,0.25,0.42,0.25,pp
0.64,0.72,0.48,0.5,0.49,0.42,0.19,pp
0.68,0.82,0.48,0.5,0.38,0.65,0.56,pp
0.32,0.39,0.48,0.5,0.53,0.28,0.38,pp
0.7,0.64,0.48,0.5,0.47,0.51,0.47,pp
0.63,0.57,0.48,0.5,0.49,0.7,0.2,pp
0.74,0.82,0.48,0.5,0.49,0.49,0.41,pp
0.63,0.86,0.48,0.5,0.39,0.47,0.34,pp
0.63,0.83,0.48,0.5,0.4,0.39,0.19,pp
0.63,0.71,0.48,0.5,0.6,0.4,0.39,pp
0.71,0.86,0.48,0.5,0.4,0.54,0.32,pp
0.68,0.78,0.48,0.5,0.43,0.44,0.42,pp
0.64,0.84,0.48,0.5,0.37,0.45,0.4,pp
0.74,0.47,0.48,0.5,0.5,0.57,0.42,pp
0.75,0.84,0.48,0.5,0.35,0.52,0.33,pp
0.63,0.65,0.48,0.5,0.39,0.44,0.35,pp
0.69,0.67,0.48,0.5,0.3,0.39,0.24,pp
0.7,0.71,0.48,0.5,0.42,0.84,0.85,pp
0.69,0.8,0.48,0.5,0.46,0.57,0.26,pp
0.64,0.66,0.48,0.5,0.41,0.39,0.2,pp
0.63,0.8,0.48,0.5,0.46,0.31,0.29,pp
0.66,0.71,0.48,0.5,0.41,0.5,0.35,pp
0.69,0.59,0.48,0.5,0.46,0.44,0.52,pp
0.68,0.67,0.48,0.5,0.49,0.4,0.34,pp
0.64,0.78,0.48,0.5,0.5,0.36,0.38,pp
0.62,0.78,0.48,0.5,0.47,0.49,0.54,pp
0.76,0.73,0.48,0.5,0.44,0.39,0.39,pp
0.64,0.81,0.48,0.5,0.37,0.39,0.44,pp
0.29,0.39,0.48,0.5,0.52,0.4,0.48,pp
0.62,0.83,0.48,0.5,0.46,0.36,0.4,pp
0.56,0.54,0.48,0.5,0.43,0.37,0.3,pp
0.69,0.66,0.48,0.5,0.41,0.5,0.25,pp
0.69,0.65,0.48,0.5,0.63,0.48,0.41,pp
0.43,0.59,0.48,0.5,0.52,0.49,0.56,pp
0.74,0.56,0.48,0.5,0.47,0.68,0.3,pp
0.71,0.57,0.48,0.5,0.48,0.35,0.32,pp
0.61,0.6,0.48,0.5,0.44,0.39,0.38,pp
0.59,0.61,0.48,0.5,0.42,0.42,0.37,pp
0.74,0.74,0.48,0.5,0.31,0.53,0.52,pp

332
tests/data/glass.arff Executable file
View File

@@ -0,0 +1,332 @@
% 1. Title: Glass Identification Database
%
% 2. Sources:
% (a) Creator: B. German
% -- Central Research Establishment
% Home Office Forensic Science Service
% Aldermaston, Reading, Berkshire RG7 4PN
% (b) Donor: Vina Spiehler, Ph.D., DABFT
% Diagnostic Products Corporation
% (213) 776-0180 (ext 3014)
% (c) Date: September, 1987
%
% 3. Past Usage:
% -- Rule Induction in Forensic Science
% -- Ian W. Evett and Ernest J. Spiehler
% -- Central Research Establishment
% Home Office Forensic Science Service
% Aldermaston, Reading, Berkshire RG7 4PN
% -- Unknown technical note number (sorry, not listed here)
% -- General Results: nearest neighbor held its own with respect to the
% rule-based system
%
% 4. Relevant Information:n
% Vina conducted a comparison test of her rule-based system, BEAGLE, the
% nearest-neighbor algorithm, and discriminant analysis. BEAGLE is
% a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
% Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
% In determining whether the glass was a type of "float" glass or not,
% the following results were obtained (# incorrect answers):
%
% Type of Sample Beagle NN DA
% Windows that were float processed (87) 10 12 21
% Windows that were not: (76) 19 16 22
%
% The study of classification of types of glass was motivated by
% criminological investigation. At the scene of the crime, the glass left
% can be used as evidence...if it is correctly identified!
%
% 5. Number of Instances: 214
%
% 6. Number of Attributes: 10 (including an Id#) plus the class attribute
% -- all attributes are continuously valued
%
% 7. Attribute Information:
% 1. Id number: 1 to 214
% 2. RI: refractive index
% 3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as
% are attributes 4-10)
% 4. Mg: Magnesium
% 5. Al: Aluminum
% 6. Si: Silicon
% 7. K: Potassium
% 8. Ca: Calcium
% 9. Ba: Barium
% 10. Fe: Iron
% 11. Type of glass: (class attribute)
% -- 1 building_windows_float_processed
% -- 2 building_windows_non_float_processed
% -- 3 vehicle_windows_float_processed
% -- 4 vehicle_windows_non_float_processed (none in this database)
% -- 5 containers
% -- 6 tableware
% -- 7 headlamps
%
% 8. Missing Attribute Values: None
%
% Summary Statistics:
% Attribute: Min Max Mean SD Correlation with class
% 2. RI: 1.5112 1.5339 1.5184 0.0030 -0.1642
% 3. Na: 10.73 17.38 13.4079 0.8166 0.5030
% 4. Mg: 0 4.49 2.6845 1.4424 -0.7447
% 5. Al: 0.29 3.5 1.4449 0.4993 0.5988
% 6. Si: 69.81 75.41 72.6509 0.7745 0.1515
% 7. K: 0 6.21 0.4971 0.6522 -0.0100
% 8. Ca: 5.43 16.19 8.9570 1.4232 0.0007
% 9. Ba: 0 3.15 0.1750 0.4972 0.5751
% 10. Fe: 0 0.51 0.0570 0.0974 -0.1879
%
% 9. Class Distribution: (out of 214 total instances)
% -- 163 Window glass (building windows and vehicle windows)
% -- 87 float processed
% -- 70 building windows
% -- 17 vehicle windows
% -- 76 non-float processed
% -- 76 building windows
% -- 0 vehicle windows
% -- 51 Non-window glass
% -- 13 containers
% -- 9 tableware
% -- 29 headlamps
%
%
%
%
%
%
%
% Relabeled values in attribute 'Type'
% From: '1' To: 'build wind float'
% From: '2' To: 'build wind non-float'
% From: '3' To: 'vehic wind float'
% From: '4' To: 'vehic wind non-float'
% From: '5' To: containers
% From: '6' To: tableware
% From: '7' To: headlamps
%
@relation Glass
@attribute 'RI' real
@attribute 'Na' real
@attribute 'Mg' real
@attribute 'Al' real
@attribute 'Si' real
@attribute 'K' real
@attribute 'Ca' real
@attribute 'Ba' real
@attribute 'Fe' real
@attribute 'Type' {'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
@data
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0,0,'build wind float'
1.51299,14.4,1.74,1.54,74.55,0,7.59,0,0,tableware
1.53393,12.3,0,1,70.16,0.12,16.19,0,0.24,'build wind non-float'
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,'build wind non-float'
1.51779,13.64,3.65,0.65,73,0.06,8.93,0,0,'vehic wind float'
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0,0,'build wind float'
1.51545,14.14,0,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0,0.28,'build wind non-float'
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0,0,'build wind non-float'
1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0,0.24,'build wind non-float'
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0,0,'build wind float'
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0,0,'vehic wind float'
1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0,0.17,'vehic wind float'
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0,0,'build wind non-float'
1.51719,14.75,0,2,73.02,0,8.53,1.59,0.08,headlamps
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0,0,'build wind non-float'
1.51994,13.27,0,1.76,73.03,0.47,11.32,0,0,containers
1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0,'build wind non-float'
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0,0.17,'build wind float'
1.52475,11.45,0,1.88,72.19,0.81,13.24,0,0.34,'build wind non-float'
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0,0.22,'build wind non-float'
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0,0.11,'build wind float'
1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,containers
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0,0,'build wind non-float'
1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0,0,'build wind non-float'
1.51683,14.56,0,1.98,73.29,0,8.52,1.57,0.07,headlamps
1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0,0,'build wind non-float'
1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0,0,'vehic wind float'
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0,0,'build wind non-float'
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0,0,'vehic wind float'
1.51115,17.38,0,0.34,75.41,0,6.65,0,0,tableware
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0,0,'build wind non-float'
1.51755,13,3.6,1.36,72.99,0.57,8.4,0,0.11,'build wind float'
1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0,0.24,'build wind float'
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,'build wind float'
1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0,0,'build wind non-float'
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,'build wind non-float'
1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0,0,'build wind non-float'
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0,0,'build wind float'
1.51806,13,3.8,1.08,73.07,0.56,8.38,0,0.12,'build wind non-float'
1.51627,13,3.58,1.54,72.83,0.61,8.04,0,0,'build wind non-float'
1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0,0,'build wind non-float'
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,'vehic wind float'
1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0,0,'build wind float'
1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0,containers
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0,'build wind float'
1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0,0,'build wind float'
1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0,0,'build wind non-float'
1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0,0.06,'build wind float'
1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,'build wind non-float'
1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0,0,'build wind float'
1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0,0,'build wind non-float'
1.5164,14.37,0,2.74,72.85,0,9.45,0.54,0,headlamps
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0,0.07,'build wind float'
1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0,0,headlamps
1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0,0,'build wind float'
1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0,0,'vehic wind float'
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0,0,'build wind non-float'
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0,0.32,'build wind non-float'
1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0,0,'build wind float'
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0,0,'build wind float'
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0,0.16,'build wind float'
1.51556,13.87,0,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0,0.11,'build wind float'
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0,0.37,'vehic wind float'
1.53125,10.73,0,2.1,69.81,0.58,13.3,3.15,0.28,'build wind non-float'
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0,0.17,'build wind float'
1.51829,14.46,2.24,1.62,72.38,0,9.26,0,0,tableware
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0,0.14,'build wind non-float'
1.51888,14.99,0.78,1.74,72.5,0,9.95,0,0,tableware
1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0,0.1,'build wind non-float'
1.523,13.31,3.58,0.82,71.99,0.12,10.17,0,0.03,'build wind float'
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0,0,'build wind non-float'
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0,0,'build wind float'
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0,0.31,'build wind float'
1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0,0,'vehic wind float'
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0,0,'build wind float'
1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0,0,'build wind float'
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0,'build wind float'
1.52127,14.32,3.9,0.83,71.5,0,9.49,0,0,'vehic wind float'
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0,0,'build wind float'
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0,0,containers
1.518,13.71,3.93,1.54,71.81,0.54,8.21,0,0.15,'build wind non-float'
1.52777,12.64,0,0.67,72.02,0.06,14.4,0,0,'build wind non-float'
1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0,0.19,'build wind float'
1.51764,12.98,3.54,1.21,73,0.65,8.53,0,0,'build wind float'
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0,0,'build wind non-float'
1.51645,14.94,0,1.87,73.11,0,8.67,1.38,0,headlamps
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0,0.3,'build wind float'
1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0,0.16,'build wind float'
1.51937,13.79,2.41,1.19,72.76,0,9.77,0,0,tableware
1.51514,14.85,0,2.42,73.72,0,8.39,0.56,0,headlamps
1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0,0.07,'build wind float'
1.51732,14.95,0,1.8,72.99,0,8.61,1.55,0,headlamps
1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0,0.18,'build wind non-float'
1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0,0,'build wind non-float'
1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0,0,'build wind non-float'
1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0,0,'build wind float'
1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0,0.29,'build wind non-float'
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0,headlamps
1.51685,14.92,0,1.99,73.06,0,8.4,1.59,0,headlamps
1.51658,14.8,0,1.99,73.11,0,8.28,1.71,0,headlamps
1.51316,13.02,0,3.04,70.48,6.21,6.96,0,0,containers
1.51709,13,3.47,1.79,72.72,0.66,8.18,0,0,'build wind non-float'
1.51727,14.7,0,2.34,73.28,0,8.95,0.66,0,headlamps
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0,0,'build wind float'
1.51969,12.64,0,1.65,73.75,0.38,11.53,0,0,containers
1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0,0.2,'build wind non-float'
1.51617,14.95,0,2.27,73.3,0,8.71,0.67,0,headlamps
1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0,0,'build wind float'
1.51651,14.38,0,1.94,73.61,0,8.48,1.57,0,headlamps
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0,0,'vehic wind float'
1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0,0,headlamps
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,'build wind non-float'
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0,headlamps
1.51818,13.72,0,0.56,74.45,0,10.99,0,0,'build wind non-float'
1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0,0.24,'build wind float'
1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0,0.24,'build wind non-float'
1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0,0.24,'build wind float'
1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0,0,'build wind non-float'
1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0,0,'build wind non-float'
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0,0.28,containers
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0,0.17,'build wind float'
1.51653,11.95,0,1.19,75.18,2.7,8.93,0,0,headlamps
1.51623,14.14,0,2.88,72.61,0.08,9.18,1.06,0,headlamps
1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0,'build wind float'
1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0,0,'build wind float'
1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0,0,'build wind non-float'
1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0,0,'build wind non-float'
1.52065,14.36,0,2.02,73.42,0,8.44,1.64,0,headlamps
1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0,0.14,'build wind float'
1.52369,13.44,0,1.58,72.22,0.32,12.24,0,0,containers
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0,'build wind float'
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0,0,'build wind float'
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0,0,'build wind non-float'
1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0,0,'build wind float'
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0,0.09,'build wind non-float'
1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0,0,'build wind float'
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0,'build wind float'
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0,'build wind float'
1.51666,12.86,0,1.83,73.88,0.97,10.17,0,0,containers
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0,0,'build wind non-float'
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0,0.12,'build wind non-float'
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0,0,'build wind non-float'
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0,0.17,'build wind non-float'
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0,0.12,'build wind non-float'
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'
1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0,headlamps
1.52227,14.17,3.81,0.78,71.35,0,9.69,0,0,'build wind float'
1.52614,13.7,0,1.36,71.24,0.19,13.44,0,0.1,'build wind non-float'
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0,0,'build wind non-float'
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0,0,'vehic wind float'
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0,0,'build wind float'
1.51508,15.15,0,2.25,73.5,0,8.34,0.63,0,headlamps
1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0,0,containers
1.51966,14.77,3.75,0.29,72.02,0.03,9,0,0,'build wind float'
1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0,0,'build wind non-float'
1.52664,11.23,0,0.77,73.21,0,14.68,0,0,'build wind non-float'
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0,0.11,'build wind float'
1.51602,14.85,0,2.38,73.28,0,8.76,0.64,0.09,headlamps
1.51321,13,0,3.02,70.7,6.21,6.93,0,0,containers
1.52739,11.02,0,0.75,73.08,0,14.96,0,0,'build wind non-float'
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0,0,'build wind float'
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0,0.35,'build wind non-float'
1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0,0,'build wind non-float'
1.51609,15.01,0,2.51,73.05,0.05,8.83,0.53,0,headlamps
1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0,0,'build wind non-float'
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0,0.19,'build wind non-float'
1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0,0.1,'build wind float'
1.51831,14.39,0,1.82,72.86,1.41,6.47,2.88,0,headlamps
1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0,0,'build wind float'
1.51613,13.88,1.78,1.79,73.1,0,8.67,0.76,0,headlamps
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0,0,'build wind float'
1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0,0,'build wind float'
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0,0,containers
1.51969,14.56,0,0.56,73.48,0,11.22,0,0,tableware
1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0,0,'build wind non-float'
1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0,0.1,'build wind non-float'
1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0,0.09,'vehic wind float'
1.52222,14.43,0,1,72.67,0.1,11.52,0,0.08,'build wind non-float'
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0,0,'build wind float'
1.51711,14.23,0,2.08,73.36,0,8.62,1.67,0,headlamps
1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0,0,'build wind float'
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0,0,'build wind float'
1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0,0.1,'vehic wind float'
1.52043,13.38,0,1.4,72.25,0.33,12.5,0,0,containers
1.519,13.49,3.48,1.35,71.95,0.55,9,0,0,'build wind float'
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0,0.09,'build wind float'
1.51905,14,2.39,1.56,72.37,0,9.57,0,0,tableware
1.51531,14.38,0,2.66,73.1,0.04,9.08,0.64,0,headlamps
1.51916,14.15,0,2.09,72.74,0,10.88,0,0,tableware
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0,0.15,'build wind non-float'
1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0,0,'build wind non-float'
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0,0,'build wind non-float'
1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0,0.09,'build wind non-float'
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0,0.21,'build wind non-float'
1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0,0,'build wind non-float'
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0,0.16,'build wind float'
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0,0,'vehic wind float'
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,'build wind float'
1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0,0,'build wind non-float'
1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0,0,'build wind float'
1.51623,14.2,0,2.79,73.46,0.04,9.04,0.4,0.09,headlamps
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0,'build wind float'
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0,0,'build wind float'
1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0,0,'vehic wind float'
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0,0,'build wind non-float'
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0,0.14,'build wind non-float'
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0,0,'build wind non-float'
1.51852,14.09,2.19,1.66,72.67,0,9.32,0,0,tableware

225
tests/data/iris.arff Executable file
View File

@@ -0,0 +1,225 @@
% 1. Title: Iris Plants Database
%
% 2. Sources:
% (a) Creator: R.A. Fisher
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
% (c) Date: July, 1988
%
% 3. Past Usage:
% - Publications: too many to mention!!! Here are a few.
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
% to Mathematical Statistics" (John Wiley, NY, 1950).
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
% Structure and Classification Rule for Recognition in Partially Exposed
% Environments". IEEE Transactions on Pattern Analysis and Machine
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
% -- Results:
% -- very low misclassification rates (0% for the setosa class)
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
% Transactions on Information Theory, May 1972, 431-433.
% -- Results:
% -- very low misclassification rates again
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
% conceptual clustering system finds 3 classes in the data.
%
% 4. Relevant Information:
% --- This is perhaps the best known database to be found in the pattern
% recognition literature. Fisher's paper is a classic in the field
% and is referenced frequently to this day. (See Duda & Hart, for
% example.) The data set contains 3 classes of 50 instances each,
% where each class refers to a type of iris plant. One class is
% linearly separable from the other 2; the latter are NOT linearly
% separable from each other.
% --- Predicted attribute: class of iris plant.
% --- This is an exceedingly simple domain.
%
% 5. Number of Instances: 150 (50 in each of three classes)
%
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
%
% 7. Attribute Information:
% 1. sepal length in cm
% 2. sepal width in cm
% 3. petal length in cm
% 4. petal width in cm
% 5. class:
% -- Iris Setosa
% -- Iris Versicolour
% -- Iris Virginica
%
% 8. Missing Attribute Values: None
%
% Summary Statistics:
% Min Max Mean SD Class Correlation
% sepal length: 4.3 7.9 5.84 0.83 0.7826
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
%
% 9. Class Distribution: 33.3% for each of 3 classes.
@RELATION iris
@ATTRIBUTE sepallength REAL
@ATTRIBUTE sepalwidth REAL
@ATTRIBUTE petallength REAL
@ATTRIBUTE petalwidth REAL
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
@DATA
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
%
%
%

5
tests/data/iris.net Normal file
View File

@@ -0,0 +1,5 @@
class sepallength
class sepalwidth
class petallength
class petalwidth
petalwidth petallength

10177
tests/data/kdd_JapaneseVowels.arff Executable file

File diff suppressed because it is too large Load Diff

20191
tests/data/letter.arff Executable file

File diff suppressed because it is too large Load Diff

399
tests/data/liver-disorders.arff Executable file
View File

@@ -0,0 +1,399 @@
% 1. Title: BUPA liver disorders
%
% 2. Source information:
% -- Creators: BUPA Medical Research Ltd.
% -- Donor: Richard S. Forsyth
% 8 Grosvenor Avenue
% Mapperley Park
% Nottingham NG3 5DX
% 0602-621676
% -- Date: 5/15/1990
%
% 3. Past usage:
% -- None known other than what is shown in the PC/BEAGLE User's Guide
% (written by Richard S. Forsyth).
%
% 4. Relevant information:
% -- The first 5 variables are all blood tests which are thought
% to be sensitive to liver disorders that might arise from
% excessive alcohol consumption. Each line in the bupa.data file
% constitutes the record of a single male individual.
% -- It appears that drinks>5 is some sort of a selector on this database.
% See the PC/BEAGLE User's Guide for more information.
%
% 5. Number of instances: 345
%
% 6. Number of attributes: 7 overall
%
% 7. Attribute information:
% 1. mcv mean corpuscular volume
% 2. alkphos alkaline phosphotase
% 3. sgpt alamine aminotransferase
% 4. sgot aspartate aminotransferase
% 5. gammagt gamma-glutamyl transpeptidase
% 6. drinks number of half-pint equivalents of alcoholic beverages
% drunk per day
% 7. selector field used to split data into two sets
%
% 8. Missing values: none%
% Information about the dataset
% CLASSTYPE: nominal
% CLASSINDEX: last
%
@relation liver-disorders
@attribute mcv INTEGER
@attribute alkphos INTEGER
@attribute sgpt INTEGER
@attribute sgot INTEGER
@attribute gammagt INTEGER
@attribute drinks REAL
@attribute selector {1,2}
@data
85,92,45,27,31,0.0,1
85,64,59,32,23,0.0,2
86,54,33,16,54,0.0,2
91,78,34,24,36,0.0,2
87,70,12,28,10,0.0,2
98,55,13,17,17,0.0,2
88,62,20,17,9,0.5,1
88,67,21,11,11,0.5,1
92,54,22,20,7,0.5,1
90,60,25,19,5,0.5,1
89,52,13,24,15,0.5,1
82,62,17,17,15,0.5,1
90,64,61,32,13,0.5,1
86,77,25,19,18,0.5,1
96,67,29,20,11,0.5,1
91,78,20,31,18,0.5,1
89,67,23,16,10,0.5,1
89,79,17,17,16,0.5,1
91,107,20,20,56,0.5,1
94,116,11,33,11,0.5,1
92,59,35,13,19,0.5,1
93,23,35,20,20,0.5,1
90,60,23,27,5,0.5,1
96,68,18,19,19,0.5,1
84,80,47,33,97,0.5,1
92,70,24,13,26,0.5,1
90,47,28,15,18,0.5,1
88,66,20,21,10,0.5,1
91,102,17,13,19,0.5,1
87,41,31,19,16,0.5,1
86,79,28,16,17,0.5,1
91,57,31,23,42,0.5,1
93,77,32,18,29,0.5,1
88,96,28,21,40,0.5,1
94,65,22,18,11,0.5,1
91,72,155,68,82,0.5,2
85,54,47,33,22,0.5,2
79,39,14,19,9,0.5,2
85,85,25,26,30,0.5,2
89,63,24,20,38,0.5,2
84,92,68,37,44,0.5,2
89,68,26,39,42,0.5,2
89,101,18,25,13,0.5,2
86,84,18,14,16,0.5,2
85,65,25,14,18,0.5,2
88,61,19,21,13,0.5,2
92,56,14,16,10,0.5,2
95,50,29,25,50,0.5,2
91,75,24,22,11,0.5,2
83,40,29,25,38,0.5,2
89,74,19,23,16,0.5,2
85,64,24,22,11,0.5,2
92,57,64,36,90,0.5,2
94,48,11,23,43,0.5,2
87,52,21,19,30,0.5,2
85,65,23,29,15,0.5,2
84,82,21,21,19,0.5,2
88,49,20,22,19,0.5,2
96,67,26,26,36,0.5,2
90,63,24,24,24,0.5,2
90,45,33,34,27,0.5,2
90,72,14,15,18,0.5,2
91,55,4,8,13,0.5,2
91,52,15,22,11,0.5,2
87,71,32,19,27,1.0,1
89,77,26,20,19,1.0,1
89,67,5,17,14,1.0,2
85,51,26,24,23,1.0,2
103,75,19,30,13,1.0,2
90,63,16,21,14,1.0,2
90,63,29,23,57,2.0,1
90,67,35,19,35,2.0,1
87,66,27,22,9,2.0,1
90,73,34,21,22,2.0,1
86,54,20,21,16,2.0,1
90,80,19,14,42,2.0,1
87,90,43,28,156,2.0,2
96,72,28,19,30,2.0,2
91,55,9,25,16,2.0,2
95,78,27,25,30,2.0,2
92,101,34,30,64,2.0,2
89,51,41,22,48,2.0,2
91,99,42,33,16,2.0,2
94,58,21,18,26,2.0,2
92,60,30,27,297,2.0,2
94,58,21,18,26,2.0,2
88,47,33,26,29,2.0,2
92,65,17,25,9,2.0,2
92,79,22,20,11,3.0,1
84,83,20,25,7,3.0,1
88,68,27,21,26,3.0,1
86,48,20,20,6,3.0,1
99,69,45,32,30,3.0,1
88,66,23,12,15,3.0,1
89,62,42,30,20,3.0,1
90,51,23,17,27,3.0,1
81,61,32,37,53,3.0,2
89,89,23,18,104,3.0,2
89,65,26,18,36,3.0,2
92,75,26,26,24,3.0,2
85,59,25,20,25,3.0,2
92,61,18,13,81,3.0,2
89,63,22,27,10,4.0,1
90,84,18,23,13,4.0,1
88,95,25,19,14,4.0,1
89,35,27,29,17,4.0,1
91,80,37,23,27,4.0,1
91,109,33,15,18,4.0,1
91,65,17,5,7,4.0,1
88,107,29,20,50,4.0,2
87,76,22,55,9,4.0,2
87,86,28,23,21,4.0,2
87,42,26,23,17,4.0,2
88,80,24,25,17,4.0,2
90,96,34,49,169,4.0,2
86,67,11,15,8,4.0,2
92,40,19,20,21,4.0,2
85,60,17,21,14,4.0,2
89,90,15,17,25,4.0,2
91,57,15,16,16,4.0,2
96,55,48,39,42,4.0,2
79,101,17,27,23,4.0,2
90,134,14,20,14,4.0,2
89,76,14,21,24,4.0,2
88,93,29,27,31,4.0,2
90,67,10,16,16,4.0,2
92,73,24,21,48,4.0,2
91,55,28,28,82,4.0,2
83,45,19,21,13,4.0,2
90,74,19,14,22,4.0,2
92,66,21,16,33,5.0,1
93,63,26,18,18,5.0,1
86,78,47,39,107,5.0,2
97,44,113,45,150,5.0,2
87,59,15,19,12,5.0,2
86,44,21,11,15,5.0,2
87,64,16,20,24,5.0,2
92,57,21,23,22,5.0,2
90,70,25,23,112,5.0,2
99,59,17,19,11,5.0,2
92,80,10,26,20,6.0,1
95,60,26,22,28,6.0,1
91,63,25,26,15,6.0,1
92,62,37,21,36,6.0,1
95,50,13,14,15,6.0,1
90,76,37,19,50,6.0,1
96,70,70,26,36,6.0,1
95,62,64,42,76,6.0,1
92,62,20,23,20,6.0,1
91,63,25,26,15,6.0,1
82,56,67,38,92,6.0,2
92,82,27,24,37,6.0,2
90,63,12,26,21,6.0,2
88,37,9,15,16,6.0,2
100,60,29,23,76,6.0,2
98,43,35,23,69,6.0,2
91,74,87,50,67,6.0,2
92,87,57,25,44,6.0,2
93,99,36,34,48,6.0,2
90,72,17,19,19,6.0,2
97,93,21,20,68,6.0,2
93,50,18,25,17,6.0,2
90,57,20,26,33,6.0,2
92,76,31,28,41,6.0,2
88,55,19,17,14,6.0,2
89,63,24,29,29,6.0,2
92,79,70,32,84,7.0,1
92,93,58,35,120,7.0,1
93,84,58,47,62,7.0,2
97,71,29,22,52,8.0,1
84,99,33,19,26,8.0,1
96,44,42,23,73,8.0,1
90,62,22,21,21,8.0,1
92,94,18,17,6,8.0,1
90,67,77,39,114,8.0,1
97,71,29,22,52,8.0,1
91,69,25,25,66,8.0,2
93,59,17,20,14,8.0,2
92,95,85,48,200,8.0,2
90,50,26,22,53,8.0,2
91,62,59,47,60,8.0,2
92,93,22,28,123,9.0,1
92,77,86,41,31,10.0,1
86,66,22,24,26,10.0,2
98,57,31,34,73,10.0,2
95,80,50,64,55,10.0,2
92,108,53,33,94,12.0,2
97,92,22,28,49,12.0,2
93,77,39,37,108,16.0,1
94,83,81,34,201,20.0,1
87,75,25,21,14,0.0,1
88,56,23,18,12,0.0,1
84,97,41,20,32,0.0,2
94,91,27,20,15,0.5,1
97,62,17,13,5,0.5,1
92,85,25,20,12,0.5,1
82,48,27,15,12,0.5,1
88,74,31,25,15,0.5,1
95,77,30,14,21,0.5,1
88,94,26,18,8,0.5,1
91,70,19,19,22,0.5,1
83,54,27,15,12,0.5,1
91,105,40,26,56,0.5,1
86,79,37,28,14,0.5,1
91,96,35,22,135,0.5,1
89,82,23,14,35,0.5,1
90,73,24,23,11,0.5,1
90,87,19,25,19,0.5,1
89,82,33,32,18,0.5,1
85,79,17,8,9,0.5,1
85,119,30,26,17,0.5,1
78,69,24,18,31,0.5,1
88,107,34,21,27,0.5,1
89,115,17,27,7,0.5,1
92,67,23,15,12,0.5,1
89,101,27,34,14,0.5,1
91,84,11,12,10,0.5,1
94,101,41,20,53,0.5,2
88,46,29,22,18,0.5,2
88,122,35,29,42,0.5,2
84,88,28,25,35,0.5,2
90,79,18,15,24,0.5,2
87,69,22,26,11,0.5,2
65,63,19,20,14,0.5,2
90,64,12,17,14,0.5,2
85,58,18,24,16,0.5,2
88,81,41,27,36,0.5,2
86,78,52,29,62,0.5,2
82,74,38,28,48,0.5,2
86,58,36,27,59,0.5,2
94,56,30,18,27,0.5,2
87,57,30,30,22,0.5,2
98,74,148,75,159,0.5,2
94,75,20,25,38,0.5,2
83,68,17,20,71,0.5,2
93,56,25,21,33,0.5,2
101,65,18,21,22,0.5,2
92,65,25,20,31,0.5,2
92,58,14,16,13,0.5,2
86,58,16,23,23,0.5,2
85,62,15,13,22,0.5,2
86,57,13,20,13,0.5,2
86,54,26,30,13,0.5,2
81,41,33,27,34,1.0,1
91,67,32,26,13,1.0,1
91,80,21,19,14,1.0,1
92,60,23,15,19,1.0,1
91,60,32,14,8,1.0,1
93,65,28,22,10,1.0,1
90,63,45,24,85,1.0,2
87,92,21,22,37,1.0,2
83,78,31,19,115,1.0,2
95,62,24,23,14,1.0,2
93,59,41,30,48,1.0,2
84,82,43,32,38,2.0,1
87,71,33,20,22,2.0,1
86,44,24,15,18,2.0,1
86,66,28,24,21,2.0,1
88,58,31,17,17,2.0,1
90,61,28,29,31,2.0,1
88,69,70,24,64,2.0,1
93,87,18,17,26,2.0,1
98,58,33,21,28,2.0,1
91,44,18,18,23,2.0,2
87,75,37,19,70,2.0,2
94,91,30,26,25,2.0,2
88,85,14,15,10,2.0,2
89,109,26,25,27,2.0,2
87,59,37,27,34,2.0,2
93,58,20,23,18,2.0,2
88,57,9,15,16,2.0,2
94,65,38,27,17,3.0,1
91,71,12,22,11,3.0,1
90,55,20,20,16,3.0,1
91,64,21,17,26,3.0,2
88,47,35,26,33,3.0,2
82,72,31,20,84,3.0,2
85,58,83,49,51,3.0,2
91,54,25,22,35,4.0,1
98,50,27,25,53,4.0,2
86,62,29,21,26,4.0,2
89,48,32,22,14,4.0,2
82,68,20,22,9,4.0,2
83,70,17,19,23,4.0,2
96,70,21,26,21,4.0,2
94,117,77,56,52,4.0,2
93,45,11,14,21,4.0,2
93,49,27,21,29,4.0,2
84,73,46,32,39,4.0,2
91,63,17,17,46,4.0,2
90,57,31,18,37,4.0,2
87,45,19,13,16,4.0,2
91,68,14,20,19,4.0,2
86,55,29,35,108,4.0,2
91,86,52,47,52,4.0,2
88,46,15,33,55,4.0,2
85,52,22,23,34,4.0,2
89,72,33,27,55,4.0,2
95,59,23,18,19,4.0,2
94,43,154,82,121,4.0,2
96,56,38,26,23,5.0,2
90,52,10,17,12,5.0,2
94,45,20,16,12,5.0,2
99,42,14,21,49,5.0,2
93,102,47,23,37,5.0,2
94,71,25,26,31,5.0,2
92,73,33,34,115,5.0,2
87,54,41,29,23,6.0,1
92,67,15,14,14,6.0,1
98,101,31,26,32,6.0,1
92,53,51,33,92,6.0,1
97,94,43,43,82,6.0,1
93,43,11,16,54,6.0,1
93,68,24,18,19,6.0,1
95,36,38,19,15,6.0,1
99,86,58,42,203,6.0,1
98,66,103,57,114,6.0,1
92,80,10,26,20,6.0,1
96,74,27,25,43,6.0,2
95,93,21,27,47,6.0,2
86,109,16,22,28,6.0,2
91,46,30,24,39,7.0,2
102,82,34,78,203,7.0,2
85,50,12,18,14,7.0,2
91,57,33,23,12,8.0,1
91,52,76,32,24,8.0,1
93,70,46,30,33,8.0,1
87,55,36,19,25,8.0,1
98,123,28,24,31,8.0,1
82,55,18,23,44,8.0,2
95,73,20,25,225,8.0,2
97,80,17,20,53,8.0,2
100,83,25,24,28,8.0,2
88,91,56,35,126,9.0,2
91,138,45,21,48,10.0,1
92,41,37,22,37,10.0,1
86,123,20,25,23,10.0,2
91,93,35,34,37,10.0,2
87,87,15,23,11,10.0,2
87,56,52,43,55,10.0,2
99,75,26,24,41,12.0,1
96,69,53,43,203,12.0,2
98,77,55,35,89,15.0,1
91,68,27,26,14,16.0,1
98,99,57,45,65,20.0,1

2306
tests/data/mfeat-factors.arff Executable file

File diff suppressed because it is too large Load Diff

4811
tests/data/spambase.arff Executable file

File diff suppressed because it is too large Load Diff