diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 54bc2de..66bcbdd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,7 +21,7 @@ add_executable( experimental_clfs/XA1DE.cpp experimental_clfs/ExpClf.cpp experimental_clfs/DecisionTree.cpp - + experimental_clfs/AdaBoost.cpp ) target_link_libraries(b_best Boost::boost "${PyClassifiers}" bayesnet::bayesnet fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" Boost::python Boost::numpy "${XLSXWRITER_LIB}") @@ -36,6 +36,7 @@ add_executable(b_grid commands/b_grid.cpp ${grid_sources} experimental_clfs/XA1DE.cpp experimental_clfs/ExpClf.cpp experimental_clfs/DecisionTree.cpp + experimental_clfs/AdaBoost.cpp ) target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" bayesnet::bayesnet fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" Boost::python Boost::numpy) @@ -48,7 +49,7 @@ add_executable(b_list commands/b_list.cpp experimental_clfs/XA1DE.cpp experimental_clfs/ExpClf.cpp experimental_clfs/DecisionTree.cpp - + experimental_clfs/AdaBoost.cpp ) target_link_libraries(b_list "${PyClassifiers}" bayesnet::bayesnet fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" Boost::python Boost::numpy "${XLSXWRITER_LIB}") @@ -63,7 +64,7 @@ add_executable(b_main commands/b_main.cpp ${main_sources} experimental_clfs/ExpClf.cpp experimental_clfs/ExpClf.cpp experimental_clfs/DecisionTree.cpp - + experimental_clfs/AdaBoost.cpp ) target_link_libraries(b_main PRIVATE nlohmann_json::nlohmann_json "${PyClassifiers}" bayesnet::bayesnet fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" Boost::python Boost::numpy) diff --git a/src/experimental_clfs/AdaBoost.cpp b/src/experimental_clfs/AdaBoost.cpp index ff5c40e..a04236d 100644 --- a/src/experimental_clfs/AdaBoost.cpp +++ b/src/experimental_clfs/AdaBoost.cpp @@ -11,11 +11,12 @@ #include #include #include +#include "TensorUtils.hpp" namespace bayesnet { AdaBoost::AdaBoost(int n_estimators, int max_depth) - : Ensemble(true), n_estimators(n_estimators), base_max_depth(max_depth) + : Ensemble(true), n_estimators(n_estimators), base_max_depth(max_depth), n(0), n_classes(0) { validHyperparameters = { "n_estimators", "base_max_depth" }; } @@ -27,6 +28,10 @@ namespace bayesnet { alphas.clear(); training_errors.clear(); + // Initialize n (number of features) and n_classes + n = dataset.size(0) - 1; // Exclude the label row + n_classes = states[className].size(); + // Initialize sample weights uniformly int n_samples = dataset.size(1); sample_weights = torch::ones({ n_samples }) / n_samples; @@ -37,6 +42,12 @@ namespace bayesnet { normalizeWeights(); } + // Debug information + std::cout << "Starting AdaBoost training with " << n_estimators << " estimators" << std::endl; + std::cout << "Number of classes: " << n_classes << std::endl; + std::cout << "Number of features: " << n << std::endl; + std::cout << "Number of samples: " << n_samples << std::endl; + // Main AdaBoost training loop (SAMME algorithm) for (int iter = 0; iter < n_estimators; ++iter) { // Train base estimator with current sample weights @@ -46,9 +57,16 @@ namespace bayesnet { double weighted_error = calculateWeightedError(estimator.get(), sample_weights); training_errors.push_back(weighted_error); + // Debug output + std::cout << "Iteration " << iter + 1 << ":" << std::endl; + std::cout << " Weighted error: " << weighted_error << std::endl; + // Check if error is too high (worse than random guessing) - double random_guess_error = 1.0 - (1.0 / getClassNumStates()); + double random_guess_error = 1.0 - (1.0 / n_classes); + + // According to SAMME, we need error < random_guess_error if (weighted_error >= random_guess_error) { + std::cout << " Error >= random guess (" << random_guess_error << "), stopping" << std::endl; // If only one estimator and it's worse than random, keep it with zero weight if (models.empty()) { models.push_back(std::move(estimator)); @@ -60,7 +78,9 @@ namespace bayesnet { // Calculate alpha (estimator weight) using SAMME formula // alpha = log((1 - err) / err) + log(K - 1) double alpha = std::log((1.0 - weighted_error) / weighted_error) + - std::log(static_cast(getClassNumStates() - 1)); + std::log(static_cast(n_classes - 1)); + + std::cout << " Alpha: " << alpha << std::endl; // Store the estimator and its weight models.push_back(std::move(estimator)); @@ -74,42 +94,54 @@ namespace bayesnet { // Check for perfect classification if (weighted_error < 1e-10) { + std::cout << " Perfect classification achieved, stopping" << std::endl; break; } } // Set the number of models actually trained n_models = models.size(); + std::cout << "AdaBoost training completed with " << n_models << " models" << std::endl; } void AdaBoost::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { - // AdaBoost handles its own weight management, so we just build the model + // Call buildModel which does the actual training buildModel(weights); + fitted = true; } std::unique_ptr AdaBoost::trainBaseEstimator(const torch::Tensor& weights) { // Create a decision tree with specified max depth - // For AdaBoost, we typically use shallow trees (stumps with max_depth=1) auto tree = std::make_unique(base_max_depth); + // Ensure weights are properly normalized + auto normalized_weights = weights / weights.sum(); + // Fit the tree with the current sample weights - tree->fit(dataset, features, className, states, weights, Smoothing_t::NONE); + tree->fit(dataset, features, className, states, normalized_weights, Smoothing_t::NONE); return tree; } double AdaBoost::calculateWeightedError(Classifier* estimator, const torch::Tensor& weights) { - // Get predictions from the estimator + // Get features and labels from dataset auto X = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), torch::indexing::Slice() }); auto y_true = dataset.index({ -1, torch::indexing::Slice() }); - auto y_pred = estimator->predict(X.t()); + + // Get predictions from the estimator + auto y_pred = estimator->predict(X); // Calculate weighted error auto incorrect = (y_pred != y_true).to(torch::kFloat); - double weighted_error = torch::sum(incorrect * weights).item(); + + // Ensure weights are normalized + auto normalized_weights = weights / weights.sum(); + + // Calculate weighted error + double weighted_error = torch::sum(incorrect * normalized_weights).item(); return weighted_error; } @@ -119,7 +151,7 @@ namespace bayesnet { // Get predictions from the estimator auto X = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), torch::indexing::Slice() }); auto y_true = dataset.index({ -1, torch::indexing::Slice() }); - auto y_pred = estimator->predict(X.t()); + auto y_pred = estimator->predict(X); // Update weights according to SAMME algorithm // w_i = w_i * exp(alpha * I(y_i != y_pred_i)) @@ -187,6 +219,16 @@ namespace bayesnet { return graph_lines; } + void AdaBoost::checkValues() const + { + if (n_estimators <= 0) { + throw std::invalid_argument("n_estimators must be positive"); + } + if (base_max_depth <= 0) { + throw std::invalid_argument("base_max_depth must be positive"); + } + } + void AdaBoost::setHyperparameters(const nlohmann::json& hyperparameters_) { auto hyperparameters = hyperparameters_; @@ -194,21 +236,209 @@ namespace bayesnet { auto it = hyperparameters.find("n_estimators"); if (it != hyperparameters.end()) { n_estimators = it->get(); - if (n_estimators <= 0) { - throw std::invalid_argument("n_estimators must be positive"); - } - hyperparameters.erase("n_estimators"); // Remove 'n_estimators' if present + hyperparameters.erase("n_estimators"); } it = hyperparameters.find("base_max_depth"); if (it != hyperparameters.end()) { base_max_depth = it->get(); - if (base_max_depth <= 0) { - throw std::invalid_argument("base_max_depth must be positive"); - } - hyperparameters.erase("base_max_depth"); // Remove 'base_max_depth' if present + hyperparameters.erase("base_max_depth"); } + checkValues(); Ensemble::setHyperparameters(hyperparameters); } + torch::Tensor AdaBoost::predict(torch::Tensor& X) + { + if (!fitted) { + throw std::runtime_error(CLASSIFIER_NOT_FITTED); + } + + if (models.empty()) { + throw std::runtime_error("No models have been trained"); + } + + // X should be (n_features, n_samples) + if (X.size(0) != n) { + throw std::runtime_error("Input has wrong number of features. Expected " + + std::to_string(n) + " but got " + std::to_string(X.size(0))); + } + + int n_samples = X.size(1); + torch::Tensor predictions = torch::zeros({ n_samples }, torch::kInt32); + + for (int i = 0; i < n_samples; i++) { + auto sample = X.index({ torch::indexing::Slice(), i }); + predictions[i] = predictSample(sample); + } + + return predictions; + } + + torch::Tensor AdaBoost::predict_proba(torch::Tensor& X) + { + if (!fitted) { + throw std::runtime_error(CLASSIFIER_NOT_FITTED); + } + + if (models.empty()) { + throw std::runtime_error("No models have been trained"); + } + + // X should be (n_features, n_samples) + if (X.size(0) != n) { + throw std::runtime_error("Input has wrong number of features. Expected " + + std::to_string(n) + " but got " + std::to_string(X.size(0))); + } + + int n_samples = X.size(1); + torch::Tensor probabilities = torch::zeros({ n_samples, n_classes }); + + for (int i = 0; i < n_samples; i++) { + auto sample = X.index({ torch::indexing::Slice(), i }); + probabilities[i] = predictProbaSample(sample); + } + + return probabilities; + } + + std::vector AdaBoost::predict(std::vector>& X) + { + // Convert to tensor - X is samples x features, need to transpose + torch::Tensor X_tensor = platform::TensorUtils::to_matrix(X).t(); + auto predictions = predict(X_tensor); + std::vector result = platform::TensorUtils::to_vector(predictions); + return result; + } + + std::vector> AdaBoost::predict_proba(std::vector>& X) + { + auto n_samples = X.size(); + // Convert to tensor - X is samples x features, need to transpose + torch::Tensor X_tensor = platform::TensorUtils::to_matrix(X).t(); + auto proba_tensor = predict_proba(X_tensor); + + std::vector> result(n_samples, std::vector(n_classes, 0.0)); + + for (size_t i = 0; i < n_samples; i++) { + for (int j = 0; j < n_classes; j++) { + result[i][j] = proba_tensor[i][j].item(); + } + } + + return result; + } + + int AdaBoost::predictSample(const torch::Tensor& x) const + { + if (!fitted) { + throw std::runtime_error(CLASSIFIER_NOT_FITTED); + } + + if (models.empty()) { + throw std::runtime_error("No models have been trained"); + } + + // x should be a 1D tensor with n features + if (x.size(0) != n) { + throw std::runtime_error("Input sample has wrong number of features. Expected " + + std::to_string(n) + " but got " + std::to_string(x.size(0))); + } + + // Initialize class votes + std::vector class_votes(n_classes, 0.0); + + // Accumulate weighted votes from all estimators + for (size_t i = 0; i < models.size(); i++) { + if (alphas[i] <= 0) continue; // Skip estimators with zero or negative weight + + try { + // Create a matrix with the sample as a column vector + auto x_matrix = x.unsqueeze(1); // Shape: (n_features, 1) + + // Get prediction from this estimator + auto prediction = models[i]->predict(x_matrix); + int predicted_class = prediction[0].item(); + + // Add weighted vote for this class + if (predicted_class >= 0 && predicted_class < n_classes) { + class_votes[predicted_class] += alphas[i]; + } + } + catch (const std::exception& e) { + std::cerr << "Error in estimator " << i << ": " << e.what() << std::endl; + continue; + } + } + + // Return class with highest weighted vote + return std::distance(class_votes.begin(), + std::max_element(class_votes.begin(), class_votes.end())); + } + + torch::Tensor AdaBoost::predictProbaSample(const torch::Tensor& x) const + { + if (!fitted) { + throw std::runtime_error(CLASSIFIER_NOT_FITTED); + } + + if (models.empty()) { + throw std::runtime_error("No models have been trained"); + } + + // x should be a 1D tensor with n features + if (x.size(0) != n) { + throw std::runtime_error("Input sample has wrong number of features. Expected " + + std::to_string(n) + " but got " + std::to_string(x.size(0))); + } + + // Initialize probability accumulator + torch::Tensor class_probs = torch::zeros({ n_classes }, torch::kDouble); + + // Sum weighted probabilities from all estimators + double total_alpha = 0.0; + + for (size_t i = 0; i < models.size(); i++) { + if (alphas[i] <= 0) continue; // Skip estimators with zero or negative weight + + try { + // Create a matrix with the sample as a column vector + auto x_matrix = x.unsqueeze(1); // Shape: (n_features, 1) + + // Get probability predictions from this estimator + auto proba = models[i]->predict_proba(x_matrix); + + // Add weighted probabilities + for (int j = 0; j < n_classes; j++) { + class_probs[j] += alphas[i] * proba[0][j].item(); + } + + total_alpha += alphas[i]; + } + catch (const std::exception& e) { + std::cerr << "Error in estimator " << i << ": " << e.what() << std::endl; + continue; + } + } + + // Normalize probabilities + if (total_alpha > 0) { + class_probs = class_probs / total_alpha; + } else { + // If no valid estimators, return uniform distribution + class_probs.fill_(1.0 / n_classes); + } + + // Ensure probabilities are valid (non-negative and sum to 1) + class_probs = torch::clamp(class_probs, 0.0, 1.0); + double sum_probs = torch::sum(class_probs).item(); + if (sum_probs > 1e-15) { + class_probs = class_probs / sum_probs; + } else { + class_probs.fill_(1.0 / n_classes); + } + + return class_probs.to(torch::kFloat); // Convert back to float for consistency + } + } // namespace bayesnet \ No newline at end of file diff --git a/src/experimental_clfs/AdaBoost.h b/src/experimental_clfs/AdaBoost.h index c9e4ede..5d1bc37 100644 --- a/src/experimental_clfs/AdaBoost.h +++ b/src/experimental_clfs/AdaBoost.h @@ -21,9 +21,9 @@ namespace bayesnet { std::vector graph(const std::string& title = "") const override; // AdaBoost specific methods - void setNEstimators(int n_estimators) { this->n_estimators = n_estimators; } + void setNEstimators(int n_estimators) { this->n_estimators = n_estimators; checkValues(); } int getNEstimators() const { return n_estimators; } - void setBaseMaxDepth(int depth) { this->base_max_depth = depth; } + void setBaseMaxDepth(int depth) { this->base_max_depth = depth; checkValues(); } int getBaseMaxDepth() const { return base_max_depth; } // Get the weight of each base estimator @@ -35,6 +35,11 @@ namespace bayesnet { // Override setHyperparameters from BaseClassifier void setHyperparameters(const nlohmann::json& hyperparameters) override; + torch::Tensor predict(torch::Tensor& X) override; + std::vector predict(std::vector>& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; + std::vector> predict_proba(std::vector>& X); + protected: void buildModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; @@ -45,6 +50,8 @@ namespace bayesnet { std::vector alphas; // Weight of each base estimator std::vector training_errors; // Training error at each iteration torch::Tensor sample_weights; // Current sample weights + int n_classes; // Number of classes in the target variable + int n; // Number of features // Train a single base estimator std::unique_ptr trainBaseEstimator(const torch::Tensor& weights); @@ -57,6 +64,15 @@ namespace bayesnet { // Normalize weights to sum to 1 void normalizeWeights(); + + // Check if hyperparameters values are valid + void checkValues() const; + + // Make predictions for a single sample + int predictSample(const torch::Tensor& x) const; + + // Make probabilistic predictions for a single sample + torch::Tensor predictProbaSample(const torch::Tensor& x) const; }; } diff --git a/src/experimental_clfs/DecisionTree.cpp b/src/experimental_clfs/DecisionTree.cpp index c615504..307186a 100644 --- a/src/experimental_clfs/DecisionTree.cpp +++ b/src/experimental_clfs/DecisionTree.cpp @@ -327,30 +327,6 @@ namespace bayesnet { return predictions; } - void dumpTensor(const torch::Tensor& tensor, const std::string& name) - { - std::cout << name << ": " << std::endl; - for (int i = 0; i < tensor.size(0); i++) { - std::cout << "["; - for (int j = 0; j < tensor.size(1); j++) { - std::cout << tensor[i][j].item() << " "; - } - std::cout << "]" << std::endl; - } - std::cout << std::endl; - } - void dumpVector(const std::vector>& vec, const std::string& name) - { - std::cout << name << ": " << std::endl;; - for (const auto& row : vec) { - std::cout << "["; - for (const auto& val : row) { - std::cout << val << " "; - } - std::cout << "] " << std::endl; - } - std::cout << std::endl; - } std::vector DecisionTree::predict(std::vector>& X) { diff --git a/src/experimental_clfs/DecisionTree.h b/src/experimental_clfs/DecisionTree.h index 93ec930..8a1c337 100644 --- a/src/experimental_clfs/DecisionTree.h +++ b/src/experimental_clfs/DecisionTree.h @@ -30,6 +30,9 @@ namespace bayesnet { void setMaxDepth(int depth) { max_depth = depth; checkValues(); } void setMinSamplesSplit(int samples) { min_samples_split = samples; checkValues(); } void setMinSamplesLeaf(int samples) { min_samples_leaf = samples; checkValues(); } + int getMaxDepth() const { return max_depth; } + int getMinSamplesSplit() const { return min_samples_split; } + int getMinSamplesLeaf() const { return min_samples_leaf; } // Override setHyperparameters void setHyperparameters(const nlohmann::json& hyperparameters) override; @@ -39,6 +42,12 @@ namespace bayesnet { torch::Tensor predict_proba(torch::Tensor& X) override; std::vector> predict_proba(std::vector>& X); + // Make predictions for a single sample + int predictSample(const torch::Tensor& x) const; + + // Make probabilistic predictions for a single sample + torch::Tensor predictProbaSample(const torch::Tensor& x) const; + protected: void buildModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override @@ -88,11 +97,7 @@ namespace bayesnet { const torch::Tensor& sample_weights ); - // Make predictions for a single sample - int predictSample(const torch::Tensor& x) const; - // Make probabilistic predictions for a single sample - torch::Tensor predictProbaSample(const torch::Tensor& x) const; // Traverse tree to find leaf node const TreeNode* traverseTree(const torch::Tensor& x, const TreeNode* node) const; diff --git a/src/main/Models.h b/src/main/Models.h index 69ceaea..3640a01 100644 --- a/src/main/Models.h +++ b/src/main/Models.h @@ -26,7 +26,7 @@ #include #include #include "../experimental_clfs/XA1DE.h" -// #include "../experimental_clfs/AdaBoost.h" +#include "../experimental_clfs/AdaBoost.h" #include "../experimental_clfs/DecisionTree.h" namespace platform { diff --git a/src/main/modelRegister.h b/src/main/modelRegister.h index 4764c07..5f44728 100644 --- a/src/main/modelRegister.h +++ b/src/main/modelRegister.h @@ -37,8 +37,8 @@ namespace platform { [](void) -> bayesnet::BaseClassifier* { return new pywrap::XGBoost();}); static Registrar registrarAdaPy("AdaBoostPy", [](void) -> bayesnet::BaseClassifier* { return new pywrap::AdaBoostPy();}); - // static Registrar registrarAda("AdaBoost", - // [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AdaBoost();}); + static Registrar registrarAda("AdaBoost", + [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AdaBoost();}); static Registrar registrarDT("DecisionTree", [](void) -> bayesnet::BaseClassifier* { return new bayesnet::DecisionTree();}); static Registrar registrarXSPODE("XSPODE", diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7008066..18317bb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -12,9 +12,11 @@ if(ENABLE_TESTING) ${Bayesnet_INCLUDE_DIRS} ) set(TEST_SOURCES_PLATFORM - TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp TestDecisionTree.cpp + TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp TestDecisionTree.cpp TestAdaBoost.cpp ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp ${Platform_SOURCE_DIR}/src/common/Discretization.cpp - ${Platform_SOURCE_DIR}/src/main/Scores.cpp ${Platform_SOURCE_DIR}/src/experimental_clfs/DecisionTree.cpp + ${Platform_SOURCE_DIR}/src/main/Scores.cpp + ${Platform_SOURCE_DIR}/src/experimental_clfs/DecisionTree.cpp + ${Platform_SOURCE_DIR}/src/experimental_clfs/AdaBoost.cpp ) add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM}) target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" fimdlp Catch2::Catch2WithMain bayesnet) diff --git a/tests/TestAdaBoost.cpp b/tests/TestAdaBoost.cpp new file mode 100644 index 0000000..6c2453d --- /dev/null +++ b/tests/TestAdaBoost.cpp @@ -0,0 +1,707 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#include +#include +#include +#include +#include +#include +#include +#include "experimental_clfs/AdaBoost.h" +#include "experimental_clfs/DecisionTree.h" +#include "TestUtils.h" + +using namespace bayesnet; +using namespace Catch::Matchers; + +TEST_CASE("AdaBoost Construction", "[AdaBoost]") +{ + SECTION("Default constructor") + { + REQUIRE_NOTHROW(AdaBoost()); + } + + SECTION("Constructor with parameters") + { + REQUIRE_NOTHROW(AdaBoost(100, 2)); + } + + SECTION("Constructor parameter access") + { + AdaBoost ada(75, 3); + REQUIRE(ada.getNEstimators() == 75); + REQUIRE(ada.getBaseMaxDepth() == 3); + } +} + +TEST_CASE("AdaBoost Hyperparameter Setting", "[AdaBoost]") +{ + AdaBoost ada; + + SECTION("Set individual hyperparameters") + { + REQUIRE_NOTHROW(ada.setNEstimators(100)); + REQUIRE_NOTHROW(ada.setBaseMaxDepth(5)); + + REQUIRE(ada.getNEstimators() == 100); + REQUIRE(ada.getBaseMaxDepth() == 5); + } + + SECTION("Set hyperparameters via JSON") + { + nlohmann::json params; + params["n_estimators"] = 80; + params["base_max_depth"] = 4; + + REQUIRE_NOTHROW(ada.setHyperparameters(params)); + } + + SECTION("Invalid hyperparameters should throw") + { + nlohmann::json params; + + // Negative n_estimators + params["n_estimators"] = -1; + REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument); + + // Zero n_estimators + params["n_estimators"] = 0; + REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument); + + // Negative base_max_depth + params["n_estimators"] = 50; + params["base_max_depth"] = -1; + REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument); + + // Zero base_max_depth + params["base_max_depth"] = 0; + REQUIRE_THROWS_AS(ada.setHyperparameters(params), std::invalid_argument); + } +} + +TEST_CASE("AdaBoost Basic Functionality", "[AdaBoost]") +{ + // Create a simple dataset + int n_samples = 20; + int n_features = 2; + + std::vector> X(n_features, std::vector(n_samples)); + std::vector y(n_samples); + + // Simple pattern: class depends on first feature + for (int i = 0; i < n_samples; i++) { + X[0][i] = i < 10 ? 0 : 1; + X[1][i] = i % 2; + y[i] = X[0][i]; // Class equals first feature + } + + std::vector features = { "f1", "f2" }; + std::string className = "class"; + std::map> states; + states["f1"] = { 0, 1 }; + states["f2"] = { 0, 1 }; + states["class"] = { 0, 1 }; + + SECTION("Training with vector interface") + { + AdaBoost ada(10, 3); // 10 estimators, max_depth = 3 + REQUIRE_NOTHROW(ada.fit(X, y, features, className, states, Smoothing_t::NONE)); + + // Check that we have the expected number of models + auto weights = ada.getEstimatorWeights(); + REQUIRE(weights.size() <= 10); // Should be <= n_estimators + REQUIRE(weights.size() > 0); // Should have at least one model + + // Check training errors + auto errors = ada.getTrainingErrors(); + REQUIRE(errors.size() == weights.size()); + + // All training errors should be less than 0.5 for this simple dataset + for (double error : errors) { + REQUIRE(error < 0.5); + REQUIRE(error >= 0.0); + } + } + + SECTION("Prediction before fitting") + { + AdaBoost ada; + REQUIRE_THROWS_WITH(ada.predict(X), + ContainsSubstring("not been fitted")); + REQUIRE_THROWS_WITH(ada.predict_proba(X), + ContainsSubstring("not been fitted")); + } + + SECTION("Prediction with vector interface") + { + AdaBoost ada(10, 3); + ada.fit(X, y, features, className, states, Smoothing_t::NONE); + + auto predictions = ada.predict(X); + REQUIRE(predictions.size() == static_cast(n_samples)); + + } + + SECTION("Probability predictions with vector interface") + { + AdaBoost ada(10, 3); + ada.fit(X, y, features, className, states, Smoothing_t::NONE); + + auto proba = ada.predict_proba(X); + REQUIRE(proba.size() == static_cast(n_samples)); + REQUIRE(proba[0].size() == 2); // Two classes + + // Check probabilities sum to 1 and are valid + auto predictions = ada.predict(X); + for (size_t i = 0; i < proba.size(); i++) { + auto p = proba[i]; + auto pred = predictions[i]; + REQUIRE(p.size() == 2); + REQUIRE(p[0] >= 0.0); + REQUIRE(p[1] >= 0.0); + double sum = p[0] + p[1]; + REQUIRE(sum == Catch::Approx(1.0).epsilon(1e-6)); + + // Check that predict_proba matches the expected predict value + REQUIRE(pred == (p[0] > p[1] ? 0 : 1)); + } + } +} + +TEST_CASE("AdaBoost Tensor Interface", "[AdaBoost]") +{ + auto raw = RawDatasets("iris", true); + + SECTION("Training with tensor format") + { + AdaBoost ada(20, 3); + + INFO("Dataset shape: " << raw.dataset.sizes()); + INFO("Features: " << raw.featurest.size()); + INFO("Samples: " << raw.nSamples); + + // AdaBoost expects dataset in format: features x samples, with labels as last row + REQUIRE_NOTHROW(ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE)); + + // Test prediction with tensor + auto predictions = ada.predict(raw.Xt); + REQUIRE(predictions.size(0) == raw.yt.size(0)); + + // Calculate accuracy + auto correct = torch::sum(predictions == raw.yt).item(); + double accuracy = static_cast(correct) / raw.yt.size(0); + REQUIRE(accuracy > 0.85); // Should achieve good accuracy on Iris + + // Test probability predictions with tensor + auto proba = ada.predict_proba(raw.Xt); + REQUIRE(proba.size(0) == raw.yt.size(0)); + REQUIRE(proba.size(1) == 3); // Three classes in Iris + + // Check probabilities sum to 1 + auto prob_sums = torch::sum(proba, 1); + for (int i = 0; i < prob_sums.size(0); i++) { + REQUIRE(prob_sums[i].item() == Catch::Approx(1.0).epsilon(1e-6)); + } + } +} + +TEST_CASE("AdaBoost on Iris Dataset", "[AdaBoost][iris]") +{ + auto raw = RawDatasets("iris", true); + + SECTION("Training with vector interface") + { + AdaBoost ada(30, 3); + + REQUIRE_NOTHROW(ada.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv, Smoothing_t::NONE)); + + auto predictions = ada.predict(raw.Xv); + REQUIRE(predictions.size() == raw.yv.size()); + + // Calculate accuracy + int correct = 0; + for (size_t i = 0; i < predictions.size(); i++) { + if (predictions[i] == raw.yv[i]) correct++; + } + double accuracy = static_cast(correct) / raw.yv.size(); + REQUIRE(accuracy > 0.85); // Should achieve good accuracy + + // Test probability predictions + auto proba = ada.predict_proba(raw.Xv); + REQUIRE(proba.size() == raw.yv.size()); + REQUIRE(proba[0].size() == 3); // Three classes + + // Verify estimator weights and errors + auto weights = ada.getEstimatorWeights(); + auto errors = ada.getTrainingErrors(); + + REQUIRE(weights.size() == errors.size()); + REQUIRE(weights.size() > 0); + + // All weights should be positive (for non-zero error estimators) + for (double w : weights) { + REQUIRE(w >= 0.0); + } + + // All errors should be less than 0.5 (better than random) + for (double e : errors) { + REQUIRE(e < 0.5); + REQUIRE(e >= 0.0); + } + } + + SECTION("Different number of estimators") + { + std::vector n_estimators = { 5, 15, 25 }; + + for (int n_est : n_estimators) { + AdaBoost ada(n_est, 2); + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE); + + auto predictions = ada.predict(raw.Xt); + REQUIRE(predictions.size(0) == raw.yt.size(0)); + + // Check that we don't exceed the specified number of estimators + auto weights = ada.getEstimatorWeights(); + REQUIRE(static_cast(weights.size()) <= n_est); + } + } + + SECTION("Different base estimator depths") + { + std::vector depths = { 1, 2, 4 }; + + for (int depth : depths) { + AdaBoost ada(15, depth); + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE); + + auto predictions = ada.predict(raw.Xt); + REQUIRE(predictions.size(0) == raw.yt.size(0)); + } + } +} + +TEST_CASE("AdaBoost Edge Cases", "[AdaBoost]") +{ + auto raw = RawDatasets("iris", true); + + SECTION("Single estimator (depth 1 stump)") + { + AdaBoost ada(1, 1); // Single decision stump + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE); + + auto predictions = ada.predict(raw.Xt); + REQUIRE(predictions.size(0) == raw.yt.size(0)); + + auto weights = ada.getEstimatorWeights(); + REQUIRE(weights.size() == 1); + } + + SECTION("Perfect classifier scenario") + { + // Create a perfectly separable dataset + std::vector> X = { {0,0,1,1}, {0,1,0,1} }; + std::vector y = { 0, 0, 1, 1 }; + std::vector features = { "f1", "f2" }; + std::string className = "class"; + std::map> states; + states["f1"] = { 0, 1 }; + states["f2"] = { 0, 1 }; + states["class"] = { 0, 1 }; + + AdaBoost ada(10, 3); + ada.fit(X, y, features, className, states, Smoothing_t::NONE); + + auto predictions = ada.predict(X); + REQUIRE(predictions.size() == 4); + + // Should achieve perfect accuracy + int correct = 0; + for (size_t i = 0; i < predictions.size(); i++) { + if (predictions[i] == y[i]) correct++; + } + REQUIRE(correct == 4); + + // Should stop early due to perfect classification + auto errors = ada.getTrainingErrors(); + if (errors.size() > 0) { + REQUIRE(errors.back() < 1e-10); // Very low error + } + } + + SECTION("Small dataset") + { + // Very small dataset + std::vector> X = { {0,1}, {1,0} }; + std::vector y = { 0, 1 }; + std::vector features = { "f1", "f2" }; + std::string className = "class"; + std::map> states; + states["f1"] = { 0, 1 }; + states["f2"] = { 0, 1 }; + states["class"] = { 0, 1 }; + + AdaBoost ada(5, 1); + REQUIRE_NOTHROW(ada.fit(X, y, features, className, states, Smoothing_t::NONE)); + + auto predictions = ada.predict(X); + REQUIRE(predictions.size() == 2); + } +} + +TEST_CASE("AdaBoost Graph Visualization", "[AdaBoost]") +{ + // Simple dataset for visualization + std::vector> X = { {0,0,1,1}, {0,1,0,1} }; + std::vector y = { 0, 1, 1, 0 }; // XOR pattern + std::vector features = { "x1", "x2" }; + std::string className = "xor"; + std::map> states; + states["x1"] = { 0, 1 }; + states["x2"] = { 0, 1 }; + states["xor"] = { 0, 1 }; + + SECTION("Graph generation") + { + AdaBoost ada(5, 2); + ada.fit(X, y, features, className, states, Smoothing_t::NONE); + + auto graph_lines = ada.graph(); + + REQUIRE(graph_lines.size() > 2); + REQUIRE(graph_lines.front() == "digraph AdaBoost {"); + REQUIRE(graph_lines.back() == "}"); + + // Should contain base estimator references + bool has_estimators = false; + for (const auto& line : graph_lines) { + if (line.find("Estimator") != std::string::npos) { + has_estimators = true; + break; + } + } + REQUIRE(has_estimators); + + // Should contain alpha values + bool has_alpha = false; + for (const auto& line : graph_lines) { + if (line.find("α") != std::string::npos || line.find("alpha") != std::string::npos) { + has_alpha = true; + break; + } + } + REQUIRE(has_alpha); + } + + SECTION("Graph with title") + { + AdaBoost ada(3, 1); + ada.fit(X, y, features, className, states, Smoothing_t::NONE); + + auto graph_lines = ada.graph("XOR AdaBoost"); + + bool has_title = false; + for (const auto& line : graph_lines) { + if (line.find("label=\"XOR AdaBoost\"") != std::string::npos) { + has_title = true; + break; + } + } + REQUIRE(has_title); + } +} + +TEST_CASE("AdaBoost with Weights", "[AdaBoost]") +{ + auto raw = RawDatasets("iris", true); + + SECTION("Uniform weights") + { + AdaBoost ada(20, 3); + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, raw.weights, Smoothing_t::NONE); + + auto predictions = ada.predict(raw.Xt); + REQUIRE(predictions.size(0) == raw.yt.size(0)); + + auto weights = ada.getEstimatorWeights(); + REQUIRE(weights.size() > 0); + } + + SECTION("Non-uniform weights") + { + auto weights = torch::ones({ raw.nSamples }); + weights.index({ torch::indexing::Slice(0, 50) }) *= 3.0; // Emphasize first class + weights = weights / weights.sum(); + + AdaBoost ada(15, 2); + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, weights, Smoothing_t::NONE); + + auto predictions = ada.predict(raw.Xt); + REQUIRE(predictions.size(0) == raw.yt.size(0)); + + // Check that training completed successfully + auto estimator_weights = ada.getEstimatorWeights(); + auto errors = ada.getTrainingErrors(); + + REQUIRE(estimator_weights.size() == errors.size()); + REQUIRE(estimator_weights.size() > 0); + } +} + +TEST_CASE("AdaBoost Input Dimension Validation", "[AdaBoost]") +{ + auto raw = RawDatasets("iris", true); + + SECTION("Correct input dimensions") + { + AdaBoost ada(10, 2); + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE); + + // Test with correct tensor dimensions (features x samples) + REQUIRE_NOTHROW(ada.predict(raw.Xt)); + REQUIRE_NOTHROW(ada.predict_proba(raw.Xt)); + + // Test with correct vector dimensions (features x samples) + REQUIRE_NOTHROW(ada.predict(raw.Xv)); + REQUIRE_NOTHROW(ada.predict_proba(raw.Xv)); + } + + SECTION("Dimension consistency between interfaces") + { + AdaBoost ada(10, 2); + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE); + + // Get predictions from both interfaces + auto tensor_predictions = ada.predict(raw.Xt); + auto vector_predictions = ada.predict(raw.Xv); + + // Should have same number of predictions + REQUIRE(tensor_predictions.size(0) == static_cast(vector_predictions.size())); + + // Test probability predictions + auto tensor_proba = ada.predict_proba(raw.Xt); + auto vector_proba = ada.predict_proba(raw.Xv); + + REQUIRE(tensor_proba.size(0) == static_cast(vector_proba.size())); + REQUIRE(tensor_proba.size(1) == static_cast(vector_proba[0].size())); + + // Verify predictions match between interfaces + for (int i = 0; i < tensor_predictions.size(0); i++) { + REQUIRE(tensor_predictions[i].item() == vector_predictions[i]); + + // Verify probabilities match between interfaces + for (int j = 0; j < tensor_proba.size(1); j++) { + REQUIRE(tensor_proba[i][j].item() == Catch::Approx(vector_proba[i][j]).epsilon(1e-10)); + } + } + } +} + +TEST_CASE("AdaBoost Debug - Simple Dataset Analysis", "[AdaBoost][debug]") +{ + // Create the exact same simple dataset that was failing + int n_samples = 20; + int n_features = 2; + + std::vector> X(n_features, std::vector(n_samples)); + std::vector y(n_samples); + + // Simple pattern: class depends on first feature + for (int i = 0; i < n_samples; i++) { + X[0][i] = i < 10 ? 0 : 1; + X[1][i] = i % 2; + y[i] = X[0][i]; // Class equals first feature + } + + std::vector features = { "f1", "f2" }; + std::string className = "class"; + std::map> states; + states["f1"] = { 0, 1 }; + states["f2"] = { 0, 1 }; + states["class"] = { 0, 1 }; + + SECTION("Debug training process") + { + AdaBoost ada(5, 3); // Few estimators for debugging + + // This should work perfectly on this simple dataset + REQUIRE_NOTHROW(ada.fit(X, y, features, className, states, Smoothing_t::NONE)); + + // Get training details + auto weights = ada.getEstimatorWeights(); + auto errors = ada.getTrainingErrors(); + + INFO("Number of models trained: " << weights.size()); + INFO("Training errors: "); + for (size_t i = 0; i < errors.size(); i++) { + INFO(" Model " << i << ": error=" << errors[i] << ", weight=" << weights[i]); + } + + // Should have at least one model + REQUIRE(weights.size() > 0); + REQUIRE(errors.size() == weights.size()); + + // All training errors should be reasonable for this simple dataset + for (double error : errors) { + REQUIRE(error >= 0.0); + REQUIRE(error < 0.5); // Should be better than random + } + + // Test predictions + auto predictions = ada.predict(X); + REQUIRE(predictions.size() == static_cast(n_samples)); + + // Calculate accuracy + int correct = 0; + for (size_t i = 0; i < predictions.size(); i++) { + if (predictions[i] == y[i]) correct++; + INFO("Sample " << i << ": predicted=" << predictions[i] << ", actual=" << y[i]); + } + double accuracy = static_cast(correct) / n_samples; + INFO("Accuracy: " << accuracy); + + // Should achieve high accuracy on this perfectly separable dataset + REQUIRE(accuracy >= 0.9); // Lower threshold for debugging + + // Test probability predictions + auto proba = ada.predict_proba(X); + REQUIRE(proba.size() == static_cast(n_samples)); + + // Verify probabilities are valid + for (size_t i = 0; i < proba.size(); i++) { + auto p = proba[i]; + REQUIRE(p.size() == 2); + REQUIRE(p[0] >= 0.0); + REQUIRE(p[1] >= 0.0); + double sum = p[0] + p[1]; + REQUIRE(sum == Catch::Approx(1.0).epsilon(1e-6)); + + // Predicted class should match highest probability + int pred_class = predictions[i]; + REQUIRE(pred_class == (p[0] > p[1] ? 0 : 1)); + } + } + + SECTION("Compare with single DecisionTree") + { + // Test that AdaBoost performs at least as well as a single tree + DecisionTree single_tree(3, 2, 1); + single_tree.fit(X, y, features, className, states, Smoothing_t::NONE); + auto tree_predictions = single_tree.predict(X); + + int tree_correct = 0; + for (size_t i = 0; i < tree_predictions.size(); i++) { + if (tree_predictions[i] == y[i]) tree_correct++; + } + double tree_accuracy = static_cast(tree_correct) / n_samples; + + AdaBoost ada(5, 3); + ada.fit(X, y, features, className, states, Smoothing_t::NONE); + auto ada_predictions = ada.predict(X); + + int ada_correct = 0; + for (size_t i = 0; i < ada_predictions.size(); i++) { + if (ada_predictions[i] == y[i]) ada_correct++; + } + double ada_accuracy = static_cast(ada_correct) / n_samples; + + INFO("DecisionTree accuracy: " << tree_accuracy); + INFO("AdaBoost accuracy: " << ada_accuracy); + + // AdaBoost should perform at least as well as single tree + // (allowing small tolerance for numerical differences) + REQUIRE(ada_accuracy >= tree_accuracy - 0.1); + } +} + +TEST_CASE("AdaBoost SAMME Algorithm Validation", "[AdaBoost]") +{ + auto raw = RawDatasets("iris", true); + + SECTION("Prediction consistency with probabilities") + { + AdaBoost ada(15, 3); + ada.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest, Smoothing_t::NONE); + + auto predictions = ada.predict(raw.Xt); + auto probabilities = ada.predict_proba(raw.Xt); + + REQUIRE(predictions.size(0) == probabilities.size(0)); + REQUIRE(probabilities.size(1) == 3); // Three classes in Iris + + // For each sample, predicted class should correspond to highest probability + for (int i = 0; i < predictions.size(0); i++) { + int predicted_class = predictions[i].item(); + auto probs = probabilities[i]; + + // Find class with highest probability + auto max_prob_idx = torch::argmax(probs).item(); + + // Predicted class should match class with highest probability + REQUIRE(predicted_class == max_prob_idx); + + // Probabilities should sum to 1 + double sum_probs = torch::sum(probs).item(); + REQUIRE(sum_probs == Catch::Approx(1.0).epsilon(1e-6)); + + // All probabilities should be non-negative + for (int j = 0; j < 3; j++) { + REQUIRE(probs[j].item() >= 0.0); + REQUIRE(probs[j].item() <= 1.0); + } + } + } + + SECTION("Weighted voting verification") + { + // Simple dataset where we can verify the weighted voting + std::vector> X = { {0,0,1,1}, {0,1,0,1} }; + std::vector y = { 0, 1, 1, 0 }; + std::vector features = { "f1", "f2" }; + std::string className = "class"; + std::map> states; + states["f1"] = { 0, 1 }; + states["f2"] = { 0, 1 }; + states["class"] = { 0, 1 }; + + AdaBoost ada(5, 2); + ada.fit(X, y, features, className, states, Smoothing_t::NONE); + + auto predictions = ada.predict(X); + auto probabilities = ada.predict_proba(X); + auto alphas = ada.getEstimatorWeights(); + + REQUIRE(predictions.size() == 4); + REQUIRE(probabilities.size() == 4); + REQUIRE(probabilities[0].size() == 2); // Two classes + REQUIRE(alphas.size() > 0); + + // Verify that estimator weights are reasonable + for (double alpha : alphas) { + REQUIRE(alpha >= 0.0); // Alphas should be non-negative + } + + // Verify prediction-probability consistency + for (size_t i = 0; i < predictions.size(); i++) { + int pred = predictions[i]; + auto probs = probabilities[i]; + + REQUIRE(pred == (probs[0] > probs[1] ? 0 : 1)); + REQUIRE(probs[0] + probs[1] == Catch::Approx(1.0).epsilon(1e-6)); + } + } + + SECTION("Empty models edge case") + { + AdaBoost ada(1, 1); + + // Try to predict before fitting + std::vector> X = { {0}, {1} }; + REQUIRE_THROWS_WITH(ada.predict(X), ContainsSubstring("not been fitted")); + REQUIRE_THROWS_WITH(ada.predict_proba(X), ContainsSubstring("not been fitted")); + } +} \ No newline at end of file diff --git a/tests/TestDecisionTree.cpp b/tests/TestDecisionTree.cpp index 8fd4bd5..7b5ef76 100644 --- a/tests/TestDecisionTree.cpp +++ b/tests/TestDecisionTree.cpp @@ -39,6 +39,9 @@ TEST_CASE("DecisionTree Hyperparameter Setting", "[DecisionTree]") REQUIRE_NOTHROW(dt.setMaxDepth(10)); REQUIRE_NOTHROW(dt.setMinSamplesSplit(5)); REQUIRE_NOTHROW(dt.setMinSamplesLeaf(2)); + REQUIRE(dt.getMaxDepth() == 10); + REQUIRE(dt.getMinSamplesSplit() == 5); + REQUIRE(dt.getMinSamplesLeaf() == 2); } SECTION("Set hyperparameters via JSON") @@ -49,6 +52,9 @@ TEST_CASE("DecisionTree Hyperparameter Setting", "[DecisionTree]") params["min_samples_leaf"] = 2; REQUIRE_NOTHROW(dt.setHyperparameters(params)); + REQUIRE(dt.getMaxDepth() == 7); + REQUIRE(dt.getMinSamplesSplit() == 4); + REQUIRE(dt.getMinSamplesLeaf() == 2); } SECTION("Invalid hyperparameters should throw") @@ -164,7 +170,9 @@ TEST_CASE("DecisionTree on Iris Dataset", "[DecisionTree][iris]") // Calculate accuracy auto correct = torch::sum(predictions == raw.yt).item(); double accuracy = static_cast(correct) / raw.yt.size(0); + double acurracy_computed = dt.score(raw.Xt, raw.yt); REQUIRE(accuracy > 0.97); // Reasonable accuracy for Iris + REQUIRE(acurracy_computed == Catch::Approx(accuracy).epsilon(1e-6)); } SECTION("Training with vector interface")