Complete refactor of XA1DE & XBAODE with new ExpClf class
This commit is contained in:
171
src/experimental_clfs/ExpClf.cpp
Normal file
171
src/experimental_clfs/ExpClf.cpp
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include "ExpClf.h"
|
||||||
|
#include "TensorUtils.hpp"
|
||||||
|
|
||||||
|
namespace platform {
|
||||||
|
ExpClf::ExpClf() : semaphore_{ CountingSemaphore::getInstance() }
|
||||||
|
{
|
||||||
|
}
|
||||||
|
void ExpClf::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||||
|
{
|
||||||
|
if (!hyperparameters.empty()) {
|
||||||
|
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//
|
||||||
|
// Predict
|
||||||
|
//
|
||||||
|
std::vector<int> ExpClf::predict_spode(std::vector<std::vector<int>>& test_data, int parent)
|
||||||
|
{
|
||||||
|
int test_size = test_data[0].size();
|
||||||
|
int sample_size = test_data.size();
|
||||||
|
auto predictions = std::vector<int>(test_size);
|
||||||
|
|
||||||
|
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin, int chunk, int sample_size, std::vector<int>& predictions) {
|
||||||
|
std::string threadName = "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
||||||
|
#if defined(__linux__)
|
||||||
|
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||||
|
#else
|
||||||
|
pthread_setname_np(threadName.c_str());
|
||||||
|
#endif
|
||||||
|
std::vector<int> instance(sample_size);
|
||||||
|
for (int sample = begin; sample < begin + chunk; ++sample) {
|
||||||
|
for (int feature = 0; feature < sample_size; ++feature) {
|
||||||
|
instance[feature] = samples[feature][sample];
|
||||||
|
}
|
||||||
|
predictions[sample] = aode_.predict_spode(instance, parent);
|
||||||
|
}
|
||||||
|
semaphore_.release();
|
||||||
|
};
|
||||||
|
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
||||||
|
int chunk = std::min(chunk_size, test_size - begin);
|
||||||
|
semaphore_.acquire();
|
||||||
|
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(predictions));
|
||||||
|
}
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
return predictions;
|
||||||
|
}
|
||||||
|
torch::Tensor ExpClf::predict(torch::Tensor& X)
|
||||||
|
{
|
||||||
|
auto X_ = TensorUtils::to_matrix(X);
|
||||||
|
torch::Tensor y = torch::tensor(predict(X_));
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
torch::Tensor ExpClf::predict_proba(torch::Tensor& X)
|
||||||
|
{
|
||||||
|
auto X_ = TensorUtils::to_matrix(X);
|
||||||
|
auto probabilities = predict_proba(X_);
|
||||||
|
auto n_samples = X.size(1);
|
||||||
|
int n_classes = probabilities[0].size();
|
||||||
|
auto y = torch::zeros({ n_samples, n_classes });
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
for (int j = 0; j < n_classes; j++) {
|
||||||
|
y[i][j] = probabilities[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
float ExpClf::score(torch::Tensor& X, torch::Tensor& y)
|
||||||
|
{
|
||||||
|
auto X_ = TensorUtils::to_matrix(X);
|
||||||
|
auto y_ = TensorUtils::to_vector<int>(y);
|
||||||
|
return score(X_, y_);
|
||||||
|
}
|
||||||
|
std::vector<std::vector<double>> ExpClf::predict_proba(const std::vector<std::vector<int>>& test_data)
|
||||||
|
{
|
||||||
|
int test_size = test_data[0].size();
|
||||||
|
int sample_size = test_data.size();
|
||||||
|
auto probabilities = std::vector<std::vector<double>>(test_size, std::vector<double>(aode_.statesClass()));
|
||||||
|
|
||||||
|
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin, int chunk, int sample_size, std::vector<std::vector<double>>& predictions) {
|
||||||
|
std::string threadName = "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
||||||
|
#if defined(__linux__)
|
||||||
|
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||||
|
#else
|
||||||
|
pthread_setname_np(threadName.c_str());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::vector<int> instance(sample_size);
|
||||||
|
for (int sample = begin; sample < begin + chunk; ++sample) {
|
||||||
|
for (int feature = 0; feature < sample_size; ++feature) {
|
||||||
|
instance[feature] = samples[feature][sample];
|
||||||
|
}
|
||||||
|
predictions[sample] = aode_.predict_proba(instance);
|
||||||
|
}
|
||||||
|
semaphore_.release();
|
||||||
|
};
|
||||||
|
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
||||||
|
int chunk = std::min(chunk_size, test_size - begin);
|
||||||
|
semaphore_.acquire();
|
||||||
|
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
|
||||||
|
}
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
return probabilities;
|
||||||
|
}
|
||||||
|
std::vector<int> ExpClf::predict(std::vector<std::vector<int>>& test_data)
|
||||||
|
{
|
||||||
|
if (!fitted) {
|
||||||
|
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
||||||
|
}
|
||||||
|
auto probabilities = predict_proba(test_data);
|
||||||
|
std::vector<int> predictions(probabilities.size(), 0);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < probabilities.size(); i++) {
|
||||||
|
predictions[i] = std::distance(probabilities[i].begin(), std::max_element(probabilities[i].begin(), probabilities[i].end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return predictions;
|
||||||
|
}
|
||||||
|
float ExpClf::score(std::vector<std::vector<int>>& test_data, std::vector<int>& labels)
|
||||||
|
{
|
||||||
|
Timer timer;
|
||||||
|
timer.start();
|
||||||
|
std::vector<int> predictions = predict(test_data);
|
||||||
|
int correct = 0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < predictions.size(); i++) {
|
||||||
|
if (predictions[i] == labels[i]) {
|
||||||
|
correct++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (debug) {
|
||||||
|
std::cout << "* Time to predict: " << timer.getDurationString() << std::endl;
|
||||||
|
}
|
||||||
|
return static_cast<float>(correct) / predictions.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// statistics
|
||||||
|
//
|
||||||
|
int ExpClf::getNumberOfNodes() const
|
||||||
|
{
|
||||||
|
return aode_.getNumberOfNodes();
|
||||||
|
}
|
||||||
|
int ExpClf::getNumberOfEdges() const
|
||||||
|
{
|
||||||
|
return aode_.getNumberOfEdges();
|
||||||
|
}
|
||||||
|
int ExpClf::getNumberOfStates() const
|
||||||
|
{
|
||||||
|
return aode_.getNumberOfStates();
|
||||||
|
}
|
||||||
|
int ExpClf::getClassNumStates() const
|
||||||
|
{
|
||||||
|
return aode_.statesClass();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
74
src/experimental_clfs/ExpClf.h
Normal file
74
src/experimental_clfs/ExpClf.h
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#ifndef EXPCLF_H
|
||||||
|
#define EXPCLF_H
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <limits>
|
||||||
|
#include "bayesnet/BaseClassifier.h"
|
||||||
|
#include "common/Timer.hpp"
|
||||||
|
#include "CountingSemaphore.hpp"
|
||||||
|
#include "Xaode.hpp"
|
||||||
|
|
||||||
|
namespace platform {
|
||||||
|
|
||||||
|
class ExpClf : public bayesnet::BaseClassifier {
|
||||||
|
public:
|
||||||
|
ExpClf();
|
||||||
|
virtual ~ExpClf() = default;
|
||||||
|
ExpClf& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) { return *this; };
|
||||||
|
// X is nxm tensor, y is nx1 tensor
|
||||||
|
ExpClf& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) { return *this; };
|
||||||
|
ExpClf& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) { return *this; };
|
||||||
|
ExpClf& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) { return *this; };
|
||||||
|
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
|
||||||
|
torch::Tensor predict(torch::Tensor& X) override;
|
||||||
|
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||||
|
std::vector<int> predict_spode(std::vector<std::vector<int>>& test_data, int parent);
|
||||||
|
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
|
||||||
|
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
|
||||||
|
float score(torch::Tensor& X, torch::Tensor& y) override;
|
||||||
|
int getNumberOfNodes() const override;
|
||||||
|
int getNumberOfEdges() const override;
|
||||||
|
int getNumberOfStates() const override;
|
||||||
|
int getClassNumStates() const override;
|
||||||
|
std::vector<std::string> show() const override { return {}; }
|
||||||
|
std::vector<std::string> topological_order() override { return {}; }
|
||||||
|
std::string dump_cpt() const override { return ""; }
|
||||||
|
void setDebug(bool debug) { this->debug = debug; }
|
||||||
|
bayesnet::status_t getStatus() const override { return status; }
|
||||||
|
std::vector<std::string> getNotes() const override { return notes; }
|
||||||
|
std::vector<std::string> graph(const std::string& title = "") const override { return {}; }
|
||||||
|
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||||
|
void set_active_parents(std::vector<int> active_parents) { for (const auto& parent : active_parents) aode_.add_active_parent(parent); }
|
||||||
|
void add_active_parent(int parent) { aode_.add_active_parent(parent); }
|
||||||
|
void remove_last_parent() { aode_.remove_last_parent(); }
|
||||||
|
protected:
|
||||||
|
bool debug = false;
|
||||||
|
Xaode aode_;
|
||||||
|
torch::Tensor weights_;
|
||||||
|
bool fitted = false;
|
||||||
|
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||||
|
inline void normalize_weights(int num_instances)
|
||||||
|
{
|
||||||
|
double sum = weights_.sum().item<double>();
|
||||||
|
if (sum == 0) {
|
||||||
|
weights_ = torch::full({ num_instances }, 1.0);
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < weights_.size(0); ++i) {
|
||||||
|
weights_[i] = weights_[i].item<double>() * num_instances / sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
CountingSemaphore& semaphore_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // EXPCLF_H
|
@@ -8,190 +8,20 @@
|
|||||||
#include "TensorUtils.hpp"
|
#include "TensorUtils.hpp"
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
XA1DE::XA1DE() : semaphore_{ CountingSemaphore::getInstance() }
|
|
||||||
{
|
|
||||||
validHyperparameters = { "use_threads" };
|
|
||||||
}
|
|
||||||
void XA1DE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
|
||||||
{
|
|
||||||
auto hyperparameters = hyperparameters_;
|
|
||||||
if (hyperparameters.contains("use_threads")) {
|
|
||||||
use_threads = hyperparameters["use_threads"].get<bool>();
|
|
||||||
hyperparameters.erase("use_threads");
|
|
||||||
}
|
|
||||||
if (!hyperparameters.empty()) {
|
|
||||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
XA1DE& XA1DE::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
|
XA1DE& XA1DE::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
Timer timer, timert;
|
|
||||||
timer.start();
|
|
||||||
timert.start();
|
|
||||||
// debug = true;
|
|
||||||
std::vector<std::vector<int>> instances = X;
|
std::vector<std::vector<int>> instances = X;
|
||||||
instances.push_back(y);
|
instances.push_back(y);
|
||||||
int num_instances = instances[0].size();
|
int num_instances = instances[0].size();
|
||||||
int num_attributes = instances.size();
|
int num_attributes = instances.size();
|
||||||
|
|
||||||
normalize_weights(num_instances);
|
normalize_weights(num_instances);
|
||||||
std::vector<int> statesv;
|
aode_.fit(X, y, features, className, states, weights_, true);
|
||||||
for (int i = 0; i < num_attributes; i++) {
|
|
||||||
statesv.push_back(*max_element(instances[i].begin(), instances[i].end()) + 1);
|
|
||||||
}
|
|
||||||
// std::cout << "* States: " << statesv << std::endl;
|
|
||||||
// std::cout << "* Weights: " << weights_ << std::endl;
|
|
||||||
// std::cout << "* Instances: " << num_instances << std::endl;
|
|
||||||
// std::cout << "* Attributes: " << num_attributes << std::endl;
|
|
||||||
// std::cout << "* y: " << y << std::endl;
|
|
||||||
// std::cout << "* x shape: " << X.size() << "x" << X[0].size() << std::endl;
|
|
||||||
// for (int i = 0; i < num_attributes - 1; i++) {
|
|
||||||
// std::cout << "* " << features[i] << ": " << instances[i] << std::endl;
|
|
||||||
// }
|
|
||||||
// std::cout << "Starting to build the model" << std::endl;
|
|
||||||
aode_.init(statesv);
|
|
||||||
aode_.duration_first += timer.getDuration(); timer.start();
|
|
||||||
std::vector<int> instance;
|
|
||||||
for (int n_instance = 0; n_instance < num_instances; n_instance++) {
|
|
||||||
instance.clear();
|
|
||||||
for (int feature = 0; feature < num_attributes; feature++) {
|
|
||||||
instance.push_back(instances[feature][n_instance]);
|
|
||||||
}
|
|
||||||
aode_.addSample(instance, weights_[n_instance]);
|
|
||||||
}
|
|
||||||
aode_.duration_second += timer.getDuration(); timer.start();
|
|
||||||
// if (debug) aode_.show();
|
|
||||||
aode_.computeProbabilities();
|
|
||||||
aode_.duration_third += timer.getDuration();
|
|
||||||
if (debug) {
|
|
||||||
// std::cout << "* Checking coherence... ";
|
|
||||||
// aode_.checkCoherenceApprox(1e-6);
|
|
||||||
// std::cout << "Ok!" << std::endl;
|
|
||||||
aode_.show();
|
|
||||||
// std::cout << "* Accumulated first time: " << aode_.duration_first << std::endl;
|
|
||||||
// std::cout << "* Accumulated second time: " << aode_.duration_second << std::endl;
|
|
||||||
// std::cout << "* Accumulated third time: " << aode_.duration_third << std::endl;
|
|
||||||
std::cout << "* Time to build the model: " << timert.getDuration() << " seconds" << std::endl;
|
|
||||||
// exit(1);
|
|
||||||
}
|
|
||||||
fitted = true;
|
fitted = true;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
std::vector<std::vector<double>> XA1DE::predict_proba(std::vector<std::vector<int>>& test_data)
|
|
||||||
{
|
|
||||||
if (use_threads) {
|
|
||||||
return predict_proba_threads(test_data);
|
|
||||||
}
|
|
||||||
int test_size = test_data[0].size();
|
|
||||||
std::vector<std::vector<double>> probabilities;
|
|
||||||
|
|
||||||
std::vector<int> instance;
|
|
||||||
for (int i = 0; i < test_size; i++) {
|
|
||||||
instance.clear();
|
|
||||||
for (int j = 0; j < (int)test_data.size(); j++) {
|
|
||||||
instance.push_back(test_data[j][i]);
|
|
||||||
}
|
|
||||||
probabilities.push_back(aode_.predict_proba(instance));
|
|
||||||
}
|
|
||||||
return probabilities;
|
|
||||||
}
|
|
||||||
std::vector<std::vector<double>> XA1DE::predict_proba_threads(const std::vector<std::vector<int>>& test_data)
|
|
||||||
{
|
|
||||||
int test_size = test_data[0].size();
|
|
||||||
int sample_size = test_data.size();
|
|
||||||
auto probabilities = std::vector<std::vector<double>>(test_size, std::vector<double>(aode_.statesClass()));
|
|
||||||
|
|
||||||
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
|
||||||
std::vector<std::thread> threads;
|
|
||||||
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin, int chunk, int sample_size, std::vector<std::vector<double>>& predictions) {
|
|
||||||
std::string threadName = "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
|
||||||
#if defined(__linux__)
|
|
||||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
|
||||||
#else
|
|
||||||
pthread_setname_np(threadName.c_str());
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::vector<int> instance(sample_size);
|
|
||||||
for (int sample = begin; sample < begin + chunk; ++sample) {
|
|
||||||
for (int feature = 0; feature < sample_size; ++feature) {
|
|
||||||
instance[feature] = samples[feature][sample];
|
|
||||||
}
|
|
||||||
predictions[sample] = aode_.predict_proba(instance);
|
|
||||||
}
|
|
||||||
semaphore_.release();
|
|
||||||
};
|
|
||||||
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
|
||||||
int chunk = std::min(chunk_size, test_size - begin);
|
|
||||||
semaphore_.acquire();
|
|
||||||
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
|
|
||||||
}
|
|
||||||
for (auto& thread : threads) {
|
|
||||||
thread.join();
|
|
||||||
}
|
|
||||||
return probabilities;
|
|
||||||
}
|
|
||||||
std::vector<int> XA1DE::predict(std::vector<std::vector<int>>& test_data)
|
|
||||||
{
|
|
||||||
if (!fitted) {
|
|
||||||
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
|
||||||
}
|
|
||||||
auto probabilities = predict_proba(test_data);
|
|
||||||
std::vector<int> predictions(probabilities.size(), 0);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < probabilities.size(); i++) {
|
|
||||||
predictions[i] = std::distance(probabilities[i].begin(), std::max_element(probabilities[i].begin(), probabilities[i].end()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return predictions;
|
|
||||||
}
|
|
||||||
float XA1DE::score(std::vector<std::vector<int>>& test_data, std::vector<int>& labels)
|
|
||||||
{
|
|
||||||
aode_.duration_first = 0.0;
|
|
||||||
aode_.duration_second = 0.0;
|
|
||||||
aode_.duration_third = 0.0;
|
|
||||||
Timer timer;
|
|
||||||
timer.start();
|
|
||||||
std::vector<int> predictions = predict(test_data);
|
|
||||||
int correct = 0;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < predictions.size(); i++) {
|
|
||||||
if (predictions[i] == labels[i]) {
|
|
||||||
correct++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (debug) {
|
|
||||||
std::cout << "* Time to predict: " << timer.getDurationString() << std::endl;
|
|
||||||
std::cout << "* Accumulated first time: " << aode_.duration_first << std::endl;
|
|
||||||
std::cout << "* Accumulated second time: " << aode_.duration_second << std::endl;
|
|
||||||
std::cout << "* Accumulated third time: " << aode_.duration_third << std::endl;
|
|
||||||
}
|
|
||||||
return static_cast<float>(correct) / predictions.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// statistics
|
|
||||||
//
|
|
||||||
int XA1DE::getNumberOfNodes() const
|
|
||||||
{
|
|
||||||
return aode_.getNumberOfNodes();
|
|
||||||
}
|
|
||||||
int XA1DE::getNumberOfEdges() const
|
|
||||||
{
|
|
||||||
return aode_.getNumberOfEdges();
|
|
||||||
}
|
|
||||||
int XA1DE::getNumberOfStates() const
|
|
||||||
{
|
|
||||||
return aode_.getNumberOfStates();
|
|
||||||
}
|
|
||||||
int XA1DE::getClassNumStates() const
|
|
||||||
{
|
|
||||||
return aode_.statesClass();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Fit
|
// Fit
|
||||||
//
|
//
|
||||||
// fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
|
|
||||||
XA1DE& XA1DE::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
|
XA1DE& XA1DE::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
auto X_ = TensorUtils::to_matrix(X);
|
auto X_ = TensorUtils::to_matrix(X);
|
||||||
@@ -206,71 +36,7 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
XA1DE& XA1DE::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
|
XA1DE& XA1DE::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
weights_ = TensorUtils::to_vector<double>(weights);
|
weights_ = weights;
|
||||||
return fit(dataset, features, className, states, smoothing);
|
return fit(dataset, features, className, states, smoothing);
|
||||||
}
|
}
|
||||||
//
|
|
||||||
// Predict
|
|
||||||
//
|
|
||||||
std::vector<int> XA1DE::predict_spode(std::vector<std::vector<int>>& test_data, int parent)
|
|
||||||
{
|
|
||||||
int test_size = test_data[0].size();
|
|
||||||
int sample_size = test_data.size();
|
|
||||||
auto predictions = std::vector<int>(test_size);
|
|
||||||
|
|
||||||
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
|
||||||
std::vector<std::thread> threads;
|
|
||||||
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin, int chunk, int sample_size, std::vector<int>& predictions) {
|
|
||||||
std::string threadName = "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
|
||||||
#if defined(__linux__)
|
|
||||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
|
||||||
#else
|
|
||||||
pthread_setname_np(threadName.c_str());
|
|
||||||
#endif
|
|
||||||
std::vector<int> instance(sample_size);
|
|
||||||
for (int sample = begin; sample < begin + chunk; ++sample) {
|
|
||||||
for (int feature = 0; feature < sample_size; ++feature) {
|
|
||||||
instance[feature] = samples[feature][sample];
|
|
||||||
}
|
|
||||||
predictions[sample] = aode_.predict_spode(instance, parent);
|
|
||||||
}
|
|
||||||
semaphore_.release();
|
|
||||||
};
|
|
||||||
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
|
||||||
int chunk = std::min(chunk_size, test_size - begin);
|
|
||||||
semaphore_.acquire();
|
|
||||||
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(predictions));
|
|
||||||
}
|
|
||||||
for (auto& thread : threads) {
|
|
||||||
thread.join();
|
|
||||||
}
|
|
||||||
return predictions;
|
|
||||||
}
|
|
||||||
torch::Tensor XA1DE::predict(torch::Tensor& X)
|
|
||||||
{
|
|
||||||
auto X_ = TensorUtils::to_matrix(X);
|
|
||||||
torch::Tensor y = torch::tensor(predict(X_));
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
torch::Tensor XA1DE::predict_proba(torch::Tensor& X)
|
|
||||||
{
|
|
||||||
auto X_ = TensorUtils::to_matrix(X);
|
|
||||||
auto probabilities = predict_proba(X_);
|
|
||||||
auto n_samples = X.size(1);
|
|
||||||
int n_classes = probabilities[0].size();
|
|
||||||
auto y = torch::zeros({ n_samples, n_classes });
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
for (int j = 0; j < n_classes; j++) {
|
|
||||||
y[i][j] = probabilities[i][j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
float XA1DE::score(torch::Tensor& X, torch::Tensor& y)
|
|
||||||
{
|
|
||||||
auto X_ = TensorUtils::to_matrix(X);
|
|
||||||
auto y_ = TensorUtils::to_vector<int>(y);
|
|
||||||
return score(X_, y_);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
@@ -11,71 +11,24 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include "bayesnet/BaseClassifier.h"
|
|
||||||
#include "common/Timer.hpp"
|
#include "common/Timer.hpp"
|
||||||
#include "CountingSemaphore.hpp"
|
|
||||||
#include "Xaode.hpp"
|
#include "Xaode.hpp"
|
||||||
|
#include "ExpClf.h"
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
|
class XA1DE : public ExpClf {
|
||||||
class XA1DE : public bayesnet::BaseClassifier {
|
|
||||||
public:
|
public:
|
||||||
XA1DE();
|
XA1DE() = default;
|
||||||
virtual ~XA1DE() = default;
|
virtual ~XA1DE() = default;
|
||||||
XA1DE& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
|
XA1DE& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
|
||||||
XA1DE& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
|
XA1DE& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
|
||||||
XA1DE& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
|
XA1DE& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
|
||||||
XA1DE& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
XA1DE& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||||
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
|
|
||||||
torch::Tensor predict(torch::Tensor& X) override;
|
|
||||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
|
||||||
std::vector<int> predict_spode(std::vector<std::vector<int>>& test_data, int parent);
|
|
||||||
std::vector<std::vector<double>> predict_proba_threads(const std::vector<std::vector<int>>& test_data);
|
|
||||||
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
|
|
||||||
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
|
|
||||||
float score(torch::Tensor& X, torch::Tensor& y) override;
|
|
||||||
int getNumberOfNodes() const override;
|
|
||||||
int getNumberOfEdges() const override;
|
|
||||||
int getNumberOfStates() const override;
|
|
||||||
int getClassNumStates() const override;
|
|
||||||
bayesnet::status_t getStatus() const override { return status; }
|
|
||||||
std::string getVersion() override { return version; };
|
std::string getVersion() override { return version; };
|
||||||
std::vector<std::string> show() const override { return {}; }
|
|
||||||
std::vector<std::string> topological_order() override { return {}; }
|
|
||||||
std::vector<std::string> getNotes() const override { return notes; }
|
|
||||||
std::string dump_cpt() const override { return ""; }
|
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
|
||||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
|
||||||
void setDebug(bool debug) { this->debug = debug; }
|
|
||||||
std::vector<std::string> graph(const std::string& title = "") const override { return {}; }
|
|
||||||
void set_active_parents(std::vector<int> active_parents) { for (const auto& parent : active_parents) aode_.set_active_parent(parent); }
|
|
||||||
void add_active_parent(int parent) { aode_.set_active_parent(parent); }
|
|
||||||
void remove_last_parent() { aode_.remove_last_parent(); }
|
|
||||||
protected:
|
protected:
|
||||||
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override {};
|
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override {};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
|
||||||
inline void normalize_weights(int num_instances)
|
|
||||||
{
|
|
||||||
double sum = std::accumulate(weights_.begin(), weights_.end(), 0.0);
|
|
||||||
if (sum == 0) {
|
|
||||||
weights_ = std::vector<double>(num_instances, 1.0);
|
|
||||||
} else {
|
|
||||||
for (double& w : weights_) {
|
|
||||||
w = w * num_instances / sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Xaode aode_;
|
|
||||||
std::vector<double> weights_;
|
|
||||||
CountingSemaphore& semaphore_;
|
|
||||||
bool debug = false;
|
|
||||||
bayesnet::status_t status = bayesnet::NORMAL;
|
|
||||||
std::vector<std::string> notes;
|
|
||||||
bool use_threads = true;
|
|
||||||
std::string version = "1.0.0";
|
std::string version = "1.0.0";
|
||||||
bool fitted = false;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif // XA1DE_H
|
#endif // XA1DE_H
|
@@ -11,17 +11,16 @@
|
|||||||
#include "XBAODE.h"
|
#include "XBAODE.h"
|
||||||
#include "TensorUtils.hpp"
|
#include "TensorUtils.hpp"
|
||||||
#include <loguru.hpp>
|
#include <loguru.hpp>
|
||||||
#include <loguru.cpp>
|
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
XBAODE::XBAODE() : semaphore_{ CountingSemaphore::getInstance() }, Boost(false)
|
XBAODE::XBAODE() : Boost(false)
|
||||||
{
|
{
|
||||||
validHyperparameters = { "alpha_block", "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance",
|
Boost::validHyperparameters = { "alpha_block", "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance",
|
||||||
"predict_voting", "select_features" };
|
"predict_voting", "select_features" };
|
||||||
}
|
}
|
||||||
void XBAODE::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
|
void XBAODE::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
fitted = true;
|
Boost::fitted = true;
|
||||||
X_train_ = TensorUtils::to_matrix(X_train);
|
X_train_ = TensorUtils::to_matrix(X_train);
|
||||||
y_train_ = TensorUtils::to_vector<int>(y_train);
|
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||||
X_test_ = TensorUtils::to_matrix(X_test);
|
X_test_ = TensorUtils::to_matrix(X_test);
|
||||||
@@ -40,18 +39,17 @@ namespace platform {
|
|||||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||||
bool finished = false;
|
bool finished = false;
|
||||||
std::vector<int> featuresUsed;
|
std::vector<int> featuresUsed;
|
||||||
significanceModels.resize(n, 0.0); // n possible spodes
|
aode_.fit(X_train_, y_train_, features, className, states, weights_, false);
|
||||||
aode_.fit(X_train_, y_train_, features, className, states, smoothing);
|
|
||||||
n_models = 0;
|
n_models = 0;
|
||||||
if (selectFeatures) {
|
if (selectFeatures) {
|
||||||
featuresUsed = featureSelection(weights_);
|
featuresUsed = featureSelection(weights_);
|
||||||
aode_.set_active_parents(featuresUsed);
|
set_active_parents(featuresUsed);
|
||||||
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
Boost::notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||||
auto ypred = aode_.predict(X_train);
|
auto ypred = ExpClf::predict(X_train);
|
||||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||||
// Update significance of the models
|
// Update significance of the models
|
||||||
for (const auto& parent : featuresUsed) {
|
for (const auto& parent : featuresUsed) {
|
||||||
significanceModels[parent] = alpha_t;
|
aode_.significance_models[parent] = alpha_t;
|
||||||
}
|
}
|
||||||
n_models = featuresUsed.size();
|
n_models = featuresUsed.size();
|
||||||
VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
|
VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
|
||||||
@@ -88,7 +86,7 @@ namespace platform {
|
|||||||
while (counter++ < k && featureSelection.size() > 0) {
|
while (counter++ < k && featureSelection.size() > 0) {
|
||||||
auto feature = featureSelection[0];
|
auto feature = featureSelection[0];
|
||||||
featureSelection.erase(featureSelection.begin());
|
featureSelection.erase(featureSelection.begin());
|
||||||
aode_.add_active_parent(feature);
|
add_active_parent(feature);
|
||||||
alpha_t = 0.0;
|
alpha_t = 0.0;
|
||||||
std::vector<int> ypred;
|
std::vector<int> ypred;
|
||||||
if (alpha_block) {
|
if (alpha_block) {
|
||||||
@@ -97,16 +95,16 @@ namespace platform {
|
|||||||
//
|
//
|
||||||
// Add the model to the ensemble
|
// Add the model to the ensemble
|
||||||
n_models++;
|
n_models++;
|
||||||
significanceModels[feature] = 1.0;
|
aode_.significance_models[feature] = 1.0;
|
||||||
aode_.add_active_parent(feature);
|
aode_.add_active_parent(feature);
|
||||||
// Compute the prediction
|
// Compute the prediction
|
||||||
ypred = aode_.predict(X_train_);
|
ypred = ExpClf::predict(X_train_);
|
||||||
// Remove the model from the ensemble
|
// Remove the model from the ensemble
|
||||||
significanceModels[feature] = 0.0;
|
aode_.significance_models[feature] = 0.0;
|
||||||
aode_.remove_last_parent();
|
aode_.remove_last_parent();
|
||||||
n_models--;
|
n_models--;
|
||||||
} else {
|
} else {
|
||||||
ypred = aode_.predict_spode(X_train_, feature);
|
ypred = predict_spode(X_train_, feature);
|
||||||
}
|
}
|
||||||
// Step 3.1: Compute the classifier amout of say
|
// Step 3.1: Compute the classifier amout of say
|
||||||
auto ypred_t = torch::tensor(ypred);
|
auto ypred_t = torch::tensor(ypred);
|
||||||
@@ -115,12 +113,12 @@ namespace platform {
|
|||||||
numItemsPack++;
|
numItemsPack++;
|
||||||
featuresUsed.push_back(feature);
|
featuresUsed.push_back(feature);
|
||||||
aode_.add_active_parent(feature);
|
aode_.add_active_parent(feature);
|
||||||
significanceModels.push_back(alpha_t);
|
aode_.significance_models[feature] = alpha_t;
|
||||||
n_models++;
|
n_models++;
|
||||||
VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
|
VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
|
||||||
} // End of the pack
|
} // End of the pack
|
||||||
if (convergence && !finished) {
|
if (convergence && !finished) {
|
||||||
auto y_val_predict = predict(X_test);
|
auto y_val_predict = ExpClf::predict(X_test);
|
||||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||||
if (priorAccuracy == 0) {
|
if (priorAccuracy == 0) {
|
||||||
priorAccuracy = accuracy;
|
priorAccuracy = accuracy;
|
||||||
@@ -148,79 +146,24 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
if (tolerance > maxTolerance) {
|
if (tolerance > maxTolerance) {
|
||||||
if (numItemsPack < n_models) {
|
if (numItemsPack < n_models) {
|
||||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
Boost::notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||||
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||||
for (int i = 0; i < numItemsPack; ++i) {
|
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
|
||||||
significanceModels.pop_back();
|
aode_.remove_last_parent();
|
||||||
models.pop_back();
|
aode_.significance_models[featuresUsed[i]] = 0.0;
|
||||||
n_models--;
|
n_models--;
|
||||||
}
|
}
|
||||||
|
VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features used.", n_models, featuresUsed.size());
|
||||||
} else {
|
} else {
|
||||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
Boost::notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||||
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (featuresUsed.size() != features.size()) {
|
if (featuresUsed.size() != features.size()) {
|
||||||
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
|
Boost::notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
|
||||||
status = bayesnet::WARNING;
|
Boost::status = bayesnet::WARNING;
|
||||||
}
|
}
|
||||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
Boost::notes.push_back("Number of models: " + std::to_string(n_models));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// Predict
|
|
||||||
//
|
|
||||||
std::vector<std::vector<double>> XBAODE::predict_proba(std::vector<std::vector<int>>& test_data)
|
|
||||||
{
|
|
||||||
return aode_.predict_proba_threads(test_data);
|
|
||||||
}
|
|
||||||
std::vector<int> XBAODE::predict(std::vector<std::vector<int>>& test_data)
|
|
||||||
{
|
|
||||||
if (!fitted) {
|
|
||||||
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
|
||||||
}
|
|
||||||
return aode_.predict(test_data);
|
|
||||||
}
|
|
||||||
float XBAODE::score(std::vector<std::vector<int>>& test_data, std::vector<int>& labels)
|
|
||||||
{
|
|
||||||
return aode_.score(test_data, labels);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// statistics
|
|
||||||
//
|
|
||||||
int XBAODE::getNumberOfNodes() const
|
|
||||||
{
|
|
||||||
return aode_.getNumberOfNodes();
|
|
||||||
}
|
|
||||||
int XBAODE::getNumberOfEdges() const
|
|
||||||
{
|
|
||||||
return aode_.getNumberOfEdges();
|
|
||||||
}
|
|
||||||
int XBAODE::getNumberOfStates() const
|
|
||||||
{
|
|
||||||
return aode_.getNumberOfStates();
|
|
||||||
}
|
|
||||||
int XBAODE::getClassNumStates() const
|
|
||||||
{
|
|
||||||
return aode_.getClassNumStates();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Predict
|
|
||||||
//
|
|
||||||
torch::Tensor XBAODE::predict(torch::Tensor& X)
|
|
||||||
{
|
|
||||||
return aode_.predict(X);
|
|
||||||
}
|
|
||||||
torch::Tensor XBAODE::predict_proba(torch::Tensor& X)
|
|
||||||
{
|
|
||||||
return aode_.predict_proba(X);
|
|
||||||
}
|
|
||||||
float XBAODE::score(torch::Tensor& X, torch::Tensor& y)
|
|
||||||
{
|
|
||||||
return aode_.score(X, y);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
@@ -12,52 +12,23 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include "common/Timer.hpp"
|
#include "common/Timer.hpp"
|
||||||
#include "CountingSemaphore.hpp"
|
|
||||||
#include "bayesnet/ensembles/Boost.h"
|
#include "bayesnet/ensembles/Boost.h"
|
||||||
#include "XA1DE.h"
|
#include "ExpClf.h"
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
class XBAODE : public bayesnet::Boost {
|
class XBAODE : public bayesnet::Boost, public ExpClf {
|
||||||
public:
|
public:
|
||||||
XBAODE();
|
XBAODE();
|
||||||
virtual ~XBAODE() = default;
|
virtual ~XBAODE() = default;
|
||||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
|
||||||
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
|
|
||||||
torch::Tensor predict(torch::Tensor& X) override;
|
|
||||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
|
||||||
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
|
|
||||||
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
|
|
||||||
float score(torch::Tensor& X, torch::Tensor& y) override;
|
|
||||||
int getNumberOfNodes() const override;
|
|
||||||
int getNumberOfEdges() const override;
|
|
||||||
int getNumberOfStates() const override;
|
|
||||||
int getClassNumStates() const override;
|
|
||||||
bayesnet::status_t getStatus() const override { return status; }
|
|
||||||
std::string getVersion() override { return version; };
|
std::string getVersion() override { return version; };
|
||||||
std::vector<std::string> show() const override { return {}; }
|
|
||||||
std::vector<std::string> topological_order() override { return {}; }
|
|
||||||
std::vector<std::string> getNotes() const override { return notes; }
|
|
||||||
std::string dump_cpt() const override { return ""; }
|
|
||||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
|
||||||
void setDebug(bool debug) { this->debug = debug; }
|
|
||||||
std::vector<std::string> graph(const std::string& title = "") const override { return {}; }
|
|
||||||
void set_active_parents(std::vector<int> active_parents) { aode_.set_active_parents(active_parents); }
|
|
||||||
protected:
|
protected:
|
||||||
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||||
private:
|
private:
|
||||||
std::vector<std::vector<int>> X_train_, X_test_;
|
std::vector<std::vector<int>> X_train_, X_test_;
|
||||||
std::vector<int> y_train_, y_test_;
|
std::vector<int> y_train_, y_test_;
|
||||||
torch::Tensor dataset;
|
torch::Tensor dataset;
|
||||||
XA1DE aode_;
|
|
||||||
int n_models;
|
int n_models;
|
||||||
std::vector<double> weights_;
|
|
||||||
CountingSemaphore& semaphore_;
|
|
||||||
bool debug = false;
|
|
||||||
bayesnet::status_t status = bayesnet::NORMAL;
|
|
||||||
std::vector<std::string> notes;
|
|
||||||
bool use_threads = true;
|
|
||||||
std::string version = "0.9.7";
|
std::string version = "0.9.7";
|
||||||
bool fitted = false;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif // XBAODE_H
|
#endif // XBAODE_H
|
@@ -16,6 +16,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <torch/torch.h>
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
class Xaode {
|
class Xaode {
|
||||||
@@ -28,11 +29,49 @@ namespace platform {
|
|||||||
COUNTS,
|
COUNTS,
|
||||||
PROBS
|
PROBS
|
||||||
};
|
};
|
||||||
double duration_first = 0.0;
|
std::vector<double> significance_models;
|
||||||
double duration_second = 0.0;
|
|
||||||
double duration_third = 0.0;
|
|
||||||
Xaode() : nFeatures_{ 0 }, statesClass_{ 0 }, matrixState_{ MatrixState::EMPTY } {}
|
Xaode() : nFeatures_{ 0 }, statesClass_{ 0 }, matrixState_{ MatrixState::EMPTY } {}
|
||||||
// -------------------------------------------------------
|
// -------------------------------------------------------
|
||||||
|
// fit
|
||||||
|
// -------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Classifiers interface
|
||||||
|
// all parameter decide if the model is initialized with all the parents active or none of them
|
||||||
|
//
|
||||||
|
void fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bool all_parents)
|
||||||
|
{
|
||||||
|
int num_instances = X[0].size();
|
||||||
|
int n_features_ = X.size();
|
||||||
|
|
||||||
|
significance_models.resize(n_features_, (all_parents ? 1.0 : 0.0));
|
||||||
|
std::vector<int> statesv;
|
||||||
|
for (int i = 0; i < n_features_; i++) {
|
||||||
|
if (all_parents) active_parents.push_back(i);
|
||||||
|
statesv.push_back(*max_element(X[i].begin(), X[i].end()) + 1);
|
||||||
|
}
|
||||||
|
statesv.push_back(*max_element(y.begin(), y.end()) + 1);
|
||||||
|
// std::cout << "* States: " << statesv << std::endl;
|
||||||
|
// std::cout << "* Weights: " << weights_ << std::endl;
|
||||||
|
// std::cout << "* Instances: " << num_instances << std::endl;
|
||||||
|
// std::cout << "* Attributes: " << n_features_ +1 << std::endl;
|
||||||
|
// std::cout << "* y: " << y << std::endl;
|
||||||
|
// std::cout << "* x shape: " << X.size() << "x" << X[0].size() << std::endl;
|
||||||
|
// for (int i = 0; i < n_features_; i++) {
|
||||||
|
// std::cout << "* " << features[i] << ": " << instances[i] << std::endl;
|
||||||
|
// }
|
||||||
|
// std::cout << "Starting to build the model" << std::endl;
|
||||||
|
init(statesv);
|
||||||
|
std::vector<int> instance(n_features_ + 1);
|
||||||
|
for (int n_instance = 0; n_instance < num_instances; n_instance++) {
|
||||||
|
for (int feature = 0; feature < n_features_; feature++) {
|
||||||
|
instance[feature] = X[feature][n_instance];
|
||||||
|
}
|
||||||
|
instance[n_features_] = y[n_instance];
|
||||||
|
addSample(instance, weights[n_instance].item<double>());
|
||||||
|
}
|
||||||
|
computeProbabilities();
|
||||||
|
}
|
||||||
|
// -------------------------------------------------------
|
||||||
// init
|
// init
|
||||||
// -------------------------------------------------------
|
// -------------------------------------------------------
|
||||||
//
|
//
|
||||||
@@ -406,7 +445,7 @@ namespace platform {
|
|||||||
{
|
{
|
||||||
return (nFeatures_ + 1) * nFeatures_;
|
return (nFeatures_ + 1) * nFeatures_;
|
||||||
}
|
}
|
||||||
void set_active_parent(int active_parent)
|
void add_active_parent(int active_parent)
|
||||||
{
|
{
|
||||||
active_parents.push_back(active_parent);
|
active_parents.push_back(active_parent);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user