Add testing indices

This commit is contained in:
2024-04-02 23:59:08 +02:00
parent 37316a54e0
commit 78f2f5b299
74 changed files with 26331 additions and 12 deletions

View File

@@ -4,10 +4,11 @@
#include "TestUtils.h"
#include "folding.hpp"
TEST_CASE("KFold Test", "[Platform][KFold]")
{
// Initialize a KFold object with k=5 and a seed of 19.
std::string file_name = GENERATE("iris", "diabetes");
std::string file_name = GENERATE("iris", "diabetes", "glass");
auto raw = RawDatasets(file_name, true);
int nFolds = 5;
folding::KFold kfold(nFolds, raw.nSamples, 19);
@@ -22,13 +23,19 @@ TEST_CASE("KFold Test", "[Platform][KFold]")
// Test each fold's size and contents.
for (int i = 0; i < nFolds; ++i) {
auto [train_indices, test_indices] = kfold.getFold(i);
// Store the indices
auto fname = "kfold_" + file_name + "_" + std::to_string(i) + ".csv";
auto indices = train_indices;
indices.insert(indices.end(), test_indices.begin(), test_indices.end());
//CSVFiles::write_csv(fname, indices);
auto expected_indices = CSVFiles::read_csv(fname);
REQUIRE(indices == expected_indices);
bool result = train_indices.size() == number || train_indices.size() == number + 1;
REQUIRE(result);
REQUIRE(train_indices.size() + test_indices.size() == raw.nSamples);
}
}
}
map<int, int> counts(std::vector<int> y, std::vector<int> indices)
{
map<int, int> result;
@@ -41,7 +48,7 @@ map<int, int> counts(std::vector<int> y, std::vector<int> indices)
TEST_CASE("StratifiedKFold Test", "[Platform][StratifiedKFold]")
{
// Initialize a StratifiedKFold object with k=3, using the y std::vector, and a seed of 17.
std::string file_name = GENERATE("iris", "diabetes");
std::string file_name = GENERATE("iris", "diabetes", "glass");
int nFolds = GENERATE(3, 5, 10);
auto raw = RawDatasets(file_name, true);
folding::StratifiedKFold stratified_kfoldt(nFolds, raw.yt, 17);
@@ -66,6 +73,13 @@ TEST_CASE("StratifiedKFold Test", "[Platform][StratifiedKFold]")
auto [train_indicesv, test_indicesv] = stratified_kfoldv.getFold(fold);
REQUIRE(train_indicest == train_indicesv);
REQUIRE(test_indicest == test_indicesv);
// Store the indices
auto fname = "stratkfold_" + file_name + "_" + std::to_string(nFolds) + "_" + std::to_string(fold) + ".csv";
auto indices = train_indicesv;
indices.insert(indices.end(), test_indicesv.begin(), test_indicesv.end());
//CSVFiles::write_csv(fname, indices);
auto expected_indices = CSVFiles::read_csv(fname);
REQUIRE(indices == expected_indices);
// In the worst case scenario, the number of samples in the training set is number + raw.classNumStates
// because in that fold can come one remainder sample from each class.
REQUIRE(train_indicest.size() <= number + raw.classNumStates);