Files
Platform/tests/TestAdaBoost.cpp

814 lines
28 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/matchers/catch_matchers_string.hpp>
#include <catch2/matchers/catch_matchers_vector.hpp>
#include <torch/torch.h>
#include <memory>
#include <stdexcept>
#include "experimental_clfs/AdaBoost.h"
#include "experimental_clfs/DecisionTree.h"
#include "experimental_clfs/TensorUtils.hpp"
#include "TestUtils.h"
using namespace bayesnet;
using namespace Catch::Matchers;
TEST_CASE("AdaBoost Construction", "[AdaBoost]")
{
SECTION("Default constructor")
{
REQUIRE_NOTHROW(AdaBoost());
}
SECTION("Constructor with parameters")
{
REQUIRE_NOTHROW(AdaBoost(100, 2));
}
SECTION("Constructor parameter access")
{
AdaBoost ada(75, 3);
REQUIRE(ada.getNEstimators() == 75);
REQUIRE(ada.getBaseMaxDepth() == 3);
}
}
TEST_CASE("AdaBoost Hyperparameter Setting", "[AdaBoost]")
{
AdaBoost ada;
SECTION("Set individual hyperparameters")
{
REQUIRE_NOTHROW(ada.setNEstimators(100));
REQUIRE_NOTHROW(ada.setBaseMaxDepth(5));
REQUIRE(ada.getNEstimators() == 100);
REQUIRE(ada.getBaseMaxDepth() == 5);
}
SECTION("Set hyperparameters via JSON")
{
nlohmann::json params;
params["n_estimators"] = 80;
params["base_max_depth"] = 4;
REQUIRE_NOTHROW(ada.setHyperparameters(params));
}
SECTION("Invalid hyperparameters should throw")
{
nlohmann::json params;
// Negative n_estimators
params["n_estimators"] = -1;
REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument);
// Zero n_estimators
params["n_estimators"] = 0;
REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument);
// Negative base_max_depth
params["n_estimators"] = 50;
params["base_max_depth"] = -1;
REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument);
// Zero base_max_depth
params["base_max_depth"] = 0;
REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument);
}
}
TEST_CASE("AdaBoost Basic Functionality", "[AdaBoost]")
{
// Create a simple dataset
int n_samples = 20;
int n_features = 2;
std::vector<std::vector<int>> X(n_features, std::vector<int>(n_samples));
std::vector<int> y(n_samples);
// Simple pattern: class depends on first feature
for (int i = 0; i < n_samples; i++) {
X[0][i] = i < 10 ? 0 : 1;
X[1][i] = i % 2;
y[i] = X[0][i]; // Class equals first feature
}
std::vector<std::string> features = { "f1", "f2" };
std::string className = "class";
std::map<std::string, std::vector<int>> states;
states["f1"] = { 0, 1 };
states["f2"] = { 0, 1 };
states["class"] = { 0, 1 };
SECTION("Training with vector interface")
{
AdaBoost ada(10, 3); // 10 estimators, max_depth = 3
REQUIRE_NOTHROW(ada.fit(X, y, features, className, states, Smoothing_t::NONE));
// Check that we have the expected number of models
auto weights = ada.getEstimatorWeights();
REQUIRE(weights.size() <= 10); // Should be <= n_estimators
REQUIRE(weights.size() > 0); // Should have at least one model
// Check training errors
auto errors = ada.getTrainingErrors();
REQUIRE(errors.size() == weights.size());
// All training errors should be less than 0.5 for this simple dataset
for (double error : errors) {
REQUIRE(error < 0.5);
REQUIRE(error >= 0.0);
}
}
SECTION("Prediction before fitting")
{
AdaBoost ada;
REQUIRE_THROWS_WITH(ada.predict(X),
ContainsSubstring("not been fitted"));
REQUIRE_THROWS_WITH(ada.predict_proba(X),
ContainsSubstring("not been fitted"));
}
SECTION("Prediction with vector interface")
{
AdaBoost ada(10, 3);
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto predictions = ada.predict(X);
REQUIRE(predictions.size() == static_cast<size_t>(n_samples));
// Check accuracy
int correct = 0;
for (size_t i = 0; i < predictions.size(); i++) {
if (predictions[i] == y[i]) correct++;
}
double accuracy = static_cast<double>(correct) / n_samples;
REQUIRE(accuracy > 0.99); // Should achieve good accuracy on this simple dataset
auto accuracy_computed = ada.score(X, y);
REQUIRE(accuracy_computed == Catch::Approx(accuracy).epsilon(1e-6));
}
SECTION("Probability predictions with vector interface")
{
AdaBoost ada(10, 3);
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto proba = ada.predict_proba(X);
REQUIRE(proba.size() == static_cast<size_t>(n_samples));
REQUIRE(proba[0].size() == 2); // Two classes
// Check probabilities sum to 1 and are valid
auto predictions = ada.predict(X);
int correct = 0;
for (size_t i = 0; i < proba.size(); i++) {
auto p = proba[i];
auto pred = predictions[i];
REQUIRE(p.size() == 2);
REQUIRE(p[0] >= 0.0);
REQUIRE(p[1] >= 0.0);
double sum = p[0] + p[1];
REQUIRE(sum == Catch::Approx(1.0).epsilon(1e-6));
// compute the predicted class based on probabilities
auto predicted_class = (p[0] > p[1]) ? 0 : 1;
// compute accuracy based on predictions
if (predicted_class == y[i]) {
correct++;
}
// Check that predict_proba matches the expected predict value
REQUIRE(pred == (p[0] > p[1] ? 0 : 1));
}
double accuracy = static_cast<double>(correct) / n_samples;
REQUIRE(accuracy > 0.99); // Should achieve good accuracy on this simple dataset
}
}
TEST_CASE("AdaBoost Tensor Interface", "[AdaBoost]")
{
auto raw = RawDatasets("iris", true);
SECTION("Training with tensor format")
{
AdaBoost ada(20, 3);
INFO("Dataset shape: " << raw.dataset.sizes());
INFO("Features: " << raw.featurest.size());
INFO("Samples: " << raw.nSamples);
// AdaBoost expects dataset in format: features x samples, with labels as last row
REQUIRE_NOTHROW(ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE));
// Test prediction with tensor
auto predictions = ada.predict(raw.Xt);
REQUIRE(predictions.size(0) == raw.yt.size(0));
// Calculate accuracy
auto correct = torch::sum(predictions == raw.yt).item<int>();
double accuracy = static_cast<double>(correct) / raw.yt.size(0);
auto accuracy_computed = ada.score(raw.Xt, raw.yt);
REQUIRE(accuracy_computed == Catch::Approx(accuracy).epsilon(1e-6));
REQUIRE(accuracy > 0.97); // Should achieve good accuracy on Iris
// Test probability predictions with tensor
auto proba = ada.predict_proba(raw.Xt);
REQUIRE(proba.size(0) == raw.yt.size(0));
REQUIRE(proba.size(1) == 3); // Three classes in Iris
// Check probabilities sum to 1
auto prob_sums = torch::sum(proba, 1);
for (int i = 0; i < prob_sums.size(0); i++) {
REQUIRE(prob_sums[i].item<double>() == Catch::Approx(1.0).epsilon(1e-6));
}
}
}
TEST_CASE("AdaBoost on Iris Dataset", "[AdaBoost][iris]")
{
auto raw = RawDatasets("iris", true);
SECTION("Training with vector interface")
{
AdaBoost ada(30, 3);
REQUIRE_NOTHROW(ada.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv, Smoothing_t::NONE));
auto predictions = ada.predict(raw.Xv);
REQUIRE(predictions.size() == raw.yv.size());
// Calculate accuracy
int correct = 0;
for (size_t i = 0; i < predictions.size(); i++) {
if (predictions[i] == raw.yv[i]) correct++;
}
double accuracy = static_cast<double>(correct) / raw.yv.size();
REQUIRE(accuracy > 0.85); // Should achieve good accuracy
// Test probability predictions
auto proba = ada.predict_proba(raw.Xv);
REQUIRE(proba.size() == raw.yv.size());
REQUIRE(proba[0].size() == 3); // Three classes
// Verify estimator weights and errors
auto weights = ada.getEstimatorWeights();
auto errors = ada.getTrainingErrors();
REQUIRE(weights.size() == errors.size());
REQUIRE(weights.size() > 0);
// All weights should be positive (for non-zero error estimators)
for (double w : weights) {
REQUIRE(w >= 0.0);
}
// All errors should be less than 0.5 (better than random)
for (double e : errors) {
REQUIRE(e < 0.5);
REQUIRE(e >= 0.0);
}
}
SECTION("Different number of estimators")
{
std::vector<int> n_estimators = { 5, 15, 25 };
for (int n_est : n_estimators) {
AdaBoost ada(n_est, 2);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE);
auto predictions = ada.predict(raw.Xt);
REQUIRE(predictions.size(0) == raw.yt.size(0));
// Check that we don't exceed the specified number of estimators
auto weights = ada.getEstimatorWeights();
REQUIRE(static_cast<int>(weights.size()) <= n_est);
}
}
SECTION("Different base estimator depths")
{
std::vector<int> depths = { 1, 2, 4 };
for (int depth : depths) {
AdaBoost ada(15, depth);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE);
auto predictions = ada.predict(raw.Xt);
REQUIRE(predictions.size(0) == raw.yt.size(0));
}
}
}
TEST_CASE("AdaBoost Edge Cases", "[AdaBoost]")
{
auto raw = RawDatasets("iris", true);
SECTION("Single estimator (depth 1 stump)")
{
AdaBoost ada(1, 1); // Single decision stump
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE);
auto predictions = ada.predict(raw.Xt);
REQUIRE(predictions.size(0) == raw.yt.size(0));
auto weights = ada.getEstimatorWeights();
REQUIRE(weights.size() == 1);
}
SECTION("Perfect classifier scenario")
{
// Create a perfectly separable dataset
std::vector<std::vector<int>> X = { {0,0,1,1}, {0,1,0,1} };
std::vector<int> y = { 0, 0, 1, 1 };
std::vector<std::string> features = { "f1", "f2" };
std::string className = "class";
std::map<std::string, std::vector<int>> states;
states["f1"] = { 0, 1 };
states["f2"] = { 0, 1 };
states["class"] = { 0, 1 };
AdaBoost ada(10, 3);
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto predictions = ada.predict(X);
REQUIRE(predictions.size() == 4);
// Should achieve perfect accuracy
int correct = 0;
for (size_t i = 0; i < predictions.size(); i++) {
if (predictions[i] == y[i]) correct++;
}
REQUIRE(correct == 4);
// Should stop early due to perfect classification
auto errors = ada.getTrainingErrors();
if (errors.size() > 0) {
REQUIRE(errors.back() < 1e-10); // Very low error
}
}
SECTION("Small dataset")
{
// Very small dataset
std::vector<std::vector<int>> X = { {0,1}, {1,0} };
std::vector<int> y = { 0, 1 };
std::vector<std::string> features = { "f1", "f2" };
std::string className = "class";
std::map<std::string, std::vector<int>> states;
states["f1"] = { 0, 1 };
states["f2"] = { 0, 1 };
states["class"] = { 0, 1 };
AdaBoost ada(5, 1);
REQUIRE_NOTHROW(ada.fit(X, y, features, className, states, Smoothing_t::NONE));
auto predictions = ada.predict(X);
REQUIRE(predictions.size() == 2);
}
}
TEST_CASE("AdaBoost Graph Visualization", "[AdaBoost]")
{
// Simple dataset for visualization
std::vector<std::vector<int>> X = { {0,0,1,1}, {0,1,0,1} };
std::vector<int> y = { 0, 1, 1, 0 }; // XOR pattern
std::vector<std::string> features = { "x1", "x2" };
std::string className = "xor";
std::map<std::string, std::vector<int>> states;
states["x1"] = { 0, 1 };
states["x2"] = { 0, 1 };
states["xor"] = { 0, 1 };
SECTION("Graph generation")
{
AdaBoost ada(5, 2);
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto graph_lines = ada.graph();
REQUIRE(graph_lines.size() > 2);
REQUIRE(graph_lines.front() == "digraph AdaBoost {");
REQUIRE(graph_lines.back() == "}");
// Should contain base estimator references
bool has_estimators = false;
for (const auto& line : graph_lines) {
if (line.find("Estimator") != std::string::npos) {
has_estimators = true;
break;
}
}
REQUIRE(has_estimators);
// Should contain alpha values
bool has_alpha = false;
for (const auto& line : graph_lines) {
if (line.find("α") != std::string::npos || line.find("alpha") != std::string::npos) {
has_alpha = true;
break;
}
}
REQUIRE(has_alpha);
}
SECTION("Graph with title")
{
AdaBoost ada(3, 1);
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto graph_lines = ada.graph("XOR AdaBoost");
bool has_title = false;
for (const auto& line : graph_lines) {
if (line.find("label=\"XOR AdaBoost\"") != std::string::npos) {
has_title = true;
break;
}
}
REQUIRE(has_title);
}
}
TEST_CASE("AdaBoost with Weights", "[AdaBoost]")
{
auto raw = RawDatasets("iris", true);
SECTION("Uniform weights")
{
AdaBoost ada(20, 3);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, raw.weights, Smoothing_t::NONE);
auto predictions = ada.predict(raw.Xt);
REQUIRE(predictions.size(0) == raw.yt.size(0));
auto weights = ada.getEstimatorWeights();
REQUIRE(weights.size() > 0);
}
SECTION("Non-uniform weights")
{
auto weights = torch::ones({ raw.nSamples });
weights.index({ torch::indexing::Slice(0, 50) }) *= 3.0; // Emphasize first class
weights = weights / weights.sum();
AdaBoost ada(15, 2);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, weights, Smoothing_t::NONE);
auto predictions = ada.predict(raw.Xt);
REQUIRE(predictions.size(0) == raw.yt.size(0));
// Check that training completed successfully
auto estimator_weights = ada.getEstimatorWeights();
auto errors = ada.getTrainingErrors();
REQUIRE(estimator_weights.size() == errors.size());
REQUIRE(estimator_weights.size() > 0);
}
}
TEST_CASE("AdaBoost Input Dimension Validation", "[AdaBoost]")
{
auto raw = RawDatasets("iris", true);
SECTION("Correct input dimensions")
{
AdaBoost ada(10, 2);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE);
// Test with correct tensor dimensions (features x samples)
REQUIRE_NOTHROW(ada.predict(raw.Xt));
REQUIRE_NOTHROW(ada.predict_proba(raw.Xt));
// Test with correct vector dimensions (features x samples)
REQUIRE_NOTHROW(ada.predict(raw.Xv));
REQUIRE_NOTHROW(ada.predict_proba(raw.Xv));
}
SECTION("Dimension consistency between interfaces")
{
AdaBoost ada(10, 2);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE);
// Get predictions from both interfaces
auto tensor_predictions = ada.predict(raw.Xt);
auto vector_predictions = ada.predict(raw.Xv);
// Should have same number of predictions
REQUIRE(tensor_predictions.size(0) == static_cast<int>(vector_predictions.size()));
// Test probability predictions
auto tensor_proba = ada.predict_proba(raw.Xt);
auto vector_proba = ada.predict_proba(raw.Xv);
REQUIRE(tensor_proba.size(0) == static_cast<int>(vector_proba.size()));
REQUIRE(tensor_proba.size(1) == static_cast<int>(vector_proba[0].size()));
// Verify predictions match between interfaces
for (int i = 0; i < tensor_predictions.size(0); i++) {
REQUIRE(tensor_predictions[i].item<int>() == vector_predictions[i]);
// Verify probabilities match between interfaces
for (int j = 0; j < tensor_proba.size(1); j++) {
REQUIRE(tensor_proba[i][j].item<double>() == Catch::Approx(vector_proba[i][j]).epsilon(1e-10));
}
}
}
}
TEST_CASE("AdaBoost Debug - Simple Dataset Analysis", "[AdaBoost][debug]")
{
// Create the exact same simple dataset that was failing
int n_samples = 20;
int n_features = 2;
std::vector<std::vector<int>> X(n_features, std::vector<int>(n_samples));
std::vector<int> y(n_samples);
// Simple pattern: class depends on first feature
for (int i = 0; i < n_samples; i++) {
X[0][i] = i < 10 ? 0 : 1;
X[1][i] = i % 2;
y[i] = X[0][i]; // Class equals first feature
}
std::vector<std::string> features = { "f1", "f2" };
std::string className = "class";
std::map<std::string, std::vector<int>> states;
states["f1"] = { 0, 1 };
states["f2"] = { 0, 1 };
states["class"] = { 0, 1 };
SECTION("Debug training process")
{
AdaBoost ada(5, 3); // Few estimators for debugging
// This should work perfectly on this simple dataset
REQUIRE_NOTHROW(ada.fit(X, y, features, className, states, Smoothing_t::NONE));
// Get training details
auto weights = ada.getEstimatorWeights();
auto errors = ada.getTrainingErrors();
INFO("Number of models trained: " << weights.size());
INFO("Training errors: ");
for (size_t i = 0; i < errors.size(); i++) {
INFO(" Model " << i << ": error=" << errors[i] << ", weight=" << weights[i]);
}
// Should have at least one model
REQUIRE(weights.size() > 0);
REQUIRE(errors.size() == weights.size());
// All training errors should be reasonable for this simple dataset
for (double error : errors) {
REQUIRE(error >= 0.0);
REQUIRE(error < 0.5); // Should be better than random
}
// Test predictions
auto predictions = ada.predict(X);
REQUIRE(predictions.size() == static_cast<size_t>(n_samples));
// Calculate accuracy
int correct = 0;
for (size_t i = 0; i < predictions.size(); i++) {
if (predictions[i] == y[i]) correct++;
INFO("Sample " << i << ": predicted=" << predictions[i] << ", actual=" << y[i]);
}
double accuracy = static_cast<double>(correct) / n_samples;
INFO("Accuracy: " << accuracy);
// Should achieve high accuracy on this perfectly separable dataset
REQUIRE(accuracy >= 0.9); // Lower threshold for debugging
// Test probability predictions
auto proba = ada.predict_proba(X);
REQUIRE(proba.size() == static_cast<size_t>(n_samples));
// Verify probabilities are valid
for (size_t i = 0; i < proba.size(); i++) {
auto p = proba[i];
REQUIRE(p.size() == 2);
REQUIRE(p[0] >= 0.0);
REQUIRE(p[1] >= 0.0);
double sum = p[0] + p[1];
REQUIRE(sum == Catch::Approx(1.0).epsilon(1e-6));
// Predicted class should match highest probability
int pred_class = predictions[i];
REQUIRE(pred_class == (p[0] > p[1] ? 0 : 1));
}
}
SECTION("Compare with single DecisionTree")
{
// Test that AdaBoost performs at least as well as a single tree
DecisionTree single_tree(3, 2, 1);
single_tree.fit(X, y, features, className, states, Smoothing_t::NONE);
auto tree_predictions = single_tree.predict(X);
int tree_correct = 0;
for (size_t i = 0; i < tree_predictions.size(); i++) {
if (tree_predictions[i] == y[i]) tree_correct++;
}
double tree_accuracy = static_cast<double>(tree_correct) / n_samples;
AdaBoost ada(5, 3);
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto ada_predictions = ada.predict(X);
int ada_correct = 0;
for (size_t i = 0; i < ada_predictions.size(); i++) {
if (ada_predictions[i] == y[i]) ada_correct++;
}
double ada_accuracy = static_cast<double>(ada_correct) / n_samples;
INFO("DecisionTree accuracy: " << tree_accuracy);
INFO("AdaBoost accuracy: " << ada_accuracy);
// AdaBoost should perform at least as well as single tree
// (allowing small tolerance for numerical differences)
REQUIRE(ada_accuracy >= tree_accuracy - 0.1);
}
}
TEST_CASE("AdaBoost SAMME Algorithm Validation", "[AdaBoost]")
{
auto raw = RawDatasets("iris", true);
SECTION("Prediction consistency with probabilities")
{
AdaBoost ada(15, 3);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE);
auto predictions = ada.predict(raw.Xt);
auto probabilities = ada.predict_proba(raw.Xt);
REQUIRE(predictions.size(0) == probabilities.size(0));
REQUIRE(probabilities.size(1) == 3); // Three classes in Iris
// For each sample, predicted class should correspond to highest probability
for (int i = 0; i < predictions.size(0); i++) {
int predicted_class = predictions[i].item<int>();
auto probs = probabilities[i];
// Find class with highest probability
auto max_prob_idx = torch::argmax(probs).item<int>();
// Predicted class should match class with highest probability
REQUIRE(predicted_class == max_prob_idx);
// Probabilities should sum to 1
double sum_probs = torch::sum(probs).item<double>();
REQUIRE(sum_probs == Catch::Approx(1.0).epsilon(1e-6));
// All probabilities should be non-negative
for (int j = 0; j < 3; j++) {
REQUIRE(probs[j].item<double>() >= 0.0);
REQUIRE(probs[j].item<double>() <= 1.0);
}
}
}
SECTION("Weighted voting verification")
{
// Simple dataset where we can verify the weighted voting
std::vector<std::vector<int>> X = { {0,0,1,1}, {0,1,0,1} };
std::vector<int> y = { 0, 1, 1, 0 };
std::vector<std::string> features = { "f1", "f2" };
std::string className = "class";
std::map<std::string, std::vector<int>> states;
states["f1"] = { 0, 1 };
states["f2"] = { 0, 1 };
states["class"] = { 0, 1 };
AdaBoost ada(5, 2);
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto predictions = ada.predict(X);
auto probabilities = ada.predict_proba(X);
auto alphas = ada.getEstimatorWeights();
REQUIRE(predictions.size() == 4);
REQUIRE(probabilities.size() == 4);
REQUIRE(probabilities[0].size() == 2); // Two classes
REQUIRE(alphas.size() > 0);
// Verify that estimator weights are reasonable
for (double alpha : alphas) {
REQUIRE(alpha >= 0.0); // Alphas should be non-negative
}
// Verify prediction-probability consistency
for (size_t i = 0; i < predictions.size(); i++) {
int pred = predictions[i];
auto probs = probabilities[i];
INFO("Sample " << i << ": predicted=" << pred
<< ", probabilities=[" << probs[0] << ", " << probs[1] << "]");
REQUIRE(pred == (probs[0] > probs[1] ? 0 : 1));
REQUIRE(probs[0] + probs[1] == Catch::Approx(1.0).epsilon(1e-6));
}
}
SECTION("Empty models edge case")
{
AdaBoost ada(1, 1);
// Try to predict before fitting
std::vector<std::vector<int>> X = { {0}, {1} };
REQUIRE_THROWS_WITH(ada.predict(X), ContainsSubstring("not been fitted"));
REQUIRE_THROWS_WITH(ada.predict_proba(X), ContainsSubstring("not been fitted"));
}
}
TEST_CASE("AdaBoost Predict-Proba Consistency Fix", "[AdaBoost][consistency]")
{
// Simple binary classification dataset
std::vector<std::vector<int>> X = { {0,0,1,1}, {0,1,0,1} };
std::vector<int> y = { 0, 0, 1, 1 };
std::vector<std::string> features = { "f1", "f2" };
std::string className = "class";
std::map<std::string, std::vector<int>> states;
states["f1"] = { 0, 1 };
states["f2"] = { 0, 1 };
states["class"] = { 0, 1 };
SECTION("Binary classification consistency")
{
AdaBoost ada(3, 2);
ada.setDebug(true); // Enable debug output
ada.fit(X, y, features, className, states, Smoothing_t::NONE);
auto predictions = ada.predict(X);
auto probabilities = ada.predict_proba(X);
INFO("=== Debugging predict vs predict_proba consistency ===");
// Verify consistency for each sample
for (size_t i = 0; i < predictions.size(); i++) {
int predicted_class = predictions[i];
auto probs = probabilities[i];
INFO("Sample " << i << ":");
INFO(" True class: " << y[i]);
INFO(" Predicted class: " << predicted_class);
INFO(" Probabilities: [" << probs[0] << ", " << probs[1] << "]");
// The predicted class should be the one with highest probability
int max_prob_class = (probs[0] > probs[1]) ? 0 : 1;
INFO(" Max prob class: " << max_prob_class);
REQUIRE(predicted_class == max_prob_class);
// Probabilities should sum to 1
double sum_probs = probs[0] + probs[1];
REQUIRE(sum_probs == Catch::Approx(1.0).epsilon(1e-6));
// All probabilities should be valid
REQUIRE(probs[0] >= 0.0);
REQUIRE(probs[1] >= 0.0);
REQUIRE(probs[0] <= 1.0);
REQUIRE(probs[1] <= 1.0);
}
}
SECTION("Multi-class consistency")
{
auto raw = RawDatasets("iris", true);
AdaBoost ada(5, 2);
ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE);
auto predictions = ada.predict(raw.Xt);
auto probabilities = ada.predict_proba(raw.Xt);
// Check consistency for first 10 samples
for (int i = 0; i < std::min(static_cast<int64_t>(10), predictions.size(0)); i++) {
int predicted_class = predictions[i].item<int>();
auto probs = probabilities[i];
// Find class with maximum probability
auto max_prob_idx = torch::argmax(probs).item<int>();
INFO("Sample " << i << ":");
INFO(" Predicted class: " << predicted_class);
INFO(" Max prob class: " << max_prob_idx);
INFO(" Probabilities: [" << probs[0].item<float>() << ", "
<< probs[1].item<float>() << ", " << probs[2].item<float>() << "]");
// They must match
REQUIRE(predicted_class == max_prob_idx);
// Probabilities should sum to 1
double sum_probs = torch::sum(probs).item<double>();
REQUIRE(sum_probs == Catch::Approx(1.0).epsilon(1e-6));
}
}
}