From c1531dba2abcc6e7ec36403564489128dd58ea67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 19 Feb 2025 11:40:33 +0100 Subject: [PATCH] Complete XA1DE integration --- src/experimental_clfs/XA1DE.cpp | 67 +++++++++++++++++++++++++++++++-- src/experimental_clfs/XA1DE.h | 24 ++++++------ src/experimental_clfs/Xaode.hpp | 33 ++++++++++------ 3 files changed, 96 insertions(+), 28 deletions(-) diff --git a/src/experimental_clfs/XA1DE.cpp b/src/experimental_clfs/XA1DE.cpp index 8f1f9b0..f8c2849 100644 --- a/src/experimental_clfs/XA1DE.cpp +++ b/src/experimental_clfs/XA1DE.cpp @@ -27,6 +27,7 @@ namespace platform { Timer timer, timert; timer.start(); timert.start(); + // debug = true; std::vector> instances = X; instances.push_back(y); int num_instances = instances[0].size(); @@ -36,6 +37,16 @@ namespace platform { for (int i = 0; i < num_attributes; i++) { statesv.push_back(*max_element(instances[i].begin(), instances[i].end()) + 1); } + // std::cout << "* States: " << statesv << std::endl; + // std::cout << "* Weights: " << weights_ << std::endl; + // std::cout << "* Instances: " << num_instances << std::endl; + // std::cout << "* Attributes: " << num_attributes << std::endl; + // std::cout << "* y: " << y << std::endl; + // std::cout << "* x shape: " << X.size() << "x" << X[0].size() << std::endl; + // for (int i = 0; i < num_attributes - 1; i++) { + // std::cout << "* " << features[i] << ": " << instances[i] << std::endl; + // } + // std::cout << "Starting to build the model" << std::endl; aode_.init(statesv); aode_.duration_first += timer.getDuration(); timer.start(); std::vector instance; @@ -54,7 +65,7 @@ namespace platform { // std::cout << "* Checking coherence... "; // aode_.checkCoherenceApprox(1e-6); // std::cout << "Ok!" << std::endl; - // aode_.show(); + aode_.show(); // std::cout << "* Accumulated first time: " << aode_.duration_first << std::endl; // std::cout << "* Accumulated second time: " << aode_.duration_second << std::endl; // std::cout << "* Accumulated third time: " << aode_.duration_third << std::endl; @@ -196,6 +207,26 @@ namespace platform { return data; } + // + // statistics + // + int XA1DE::getNumberOfNodes() const + { + return aode_.getNumberOfNodes(); + } + int XA1DE::getNumberOfEdges() const + { + return aode_.getNumberOfEdges(); + } + int XA1DE::getNumberOfStates() const + { + return aode_.getNumberOfStates(); + } + int XA1DE::getClassNumStates() const + { + return aode_.statesClass(); + } + // // Fit // @@ -203,8 +234,7 @@ namespace platform { XA1DE& XA1DE::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const bayesnet::Smoothing_t smoothing) { auto X_ = to_matrix(X); - int a = 1; - std::vector y_ = to_vector(y); + auto y_ = to_vector(y); return fit(X_, y_, features, className, states, smoothing); } XA1DE& XA1DE::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const bayesnet::Smoothing_t smoothing) @@ -215,8 +245,37 @@ namespace platform { } XA1DE& XA1DE::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) { - double b = 1; weights_ = to_vector(weights); return fit(dataset, features, className, states, smoothing); } + // + // Predict + // + torch::Tensor XA1DE::predict(torch::Tensor& X) + { + auto X_ = to_matrix(X); + torch::Tensor y = torch::tensor(predict(X_)); + return y; + } + torch::Tensor XA1DE::predict_proba(torch::Tensor& X) + { + auto X_ = to_matrix(X); + auto probabilities = predict_proba(X_); + auto n_samples = X.size(1); + int n_classes = probabilities[0].size(); + auto y = torch::zeros({ n_samples, n_classes }); + for (int i = 0; i < n_samples; i++) { + for (int j = 0; j < n_classes; j++) { + y[i][j] = probabilities[i][j]; + } + } + return y; + } + float XA1DE::score(torch::Tensor& X, torch::Tensor& y) + { + auto X_ = to_matrix(X); + auto y_ = to_vector(y); + return score(X_, y_); + } + } \ No newline at end of file diff --git a/src/experimental_clfs/XA1DE.h b/src/experimental_clfs/XA1DE.h index 1cfc371..f50bb45 100644 --- a/src/experimental_clfs/XA1DE.h +++ b/src/experimental_clfs/XA1DE.h @@ -24,23 +24,23 @@ namespace platform { virtual ~XA1DE() = default; const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; - std::vector> predict_proba_threads(const std::vector>& test_data); - std::vector> predict_proba(std::vector>& X) override; - float score(std::vector>& X, std::vector& y) override; - std::vector predict(std::vector>& X) override; XA1DE& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const bayesnet::Smoothing_t smoothing) override; XA1DE& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const bayesnet::Smoothing_t smoothing) override; XA1DE& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const bayesnet::Smoothing_t smoothing) override; XA1DE& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override; - torch::Tensor predict(torch::Tensor& X) override { return torch::zeros(0); }; - torch::Tensor predict_proba(torch::Tensor& X) override { return torch::zeros(0); }; - int getNumberOfNodes() const override { return 0; }; - int getNumberOfEdges() const override { return 0; }; - int getNumberOfStates() const override { return 0; }; - int getClassNumStates() const override { return 0; }; + std::vector predict(std::vector>& X) override; + torch::Tensor predict(torch::Tensor& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; + std::vector> predict_proba_threads(const std::vector>& test_data); + std::vector> predict_proba(std::vector>& X) override; + float score(std::vector>& X, std::vector& y) override; + float score(torch::Tensor& X, torch::Tensor& y) override; + int getNumberOfNodes() const override; + int getNumberOfEdges() const override; + int getNumberOfStates() const override; + int getClassNumStates() const override; bayesnet::status_t getStatus() const override { return status; } std::string getVersion() override { return version; }; - float score(torch::Tensor& X, torch::Tensor& y) override { return 0; }; std::vector show() const override { return {}; } std::vector topological_order() override { return {}; } std::vector getNotes() const override { return notes; } @@ -57,7 +57,7 @@ namespace platform { { double sum = std::accumulate(weights_.begin(), weights_.end(), 0.0); if (sum == 0) { - weights_ = std::vector(weights_.size(), 1.0); + weights_ = std::vector(num_instances, 1.0); } else { for (double& w : weights_) { w = w * num_instances / sum; diff --git a/src/experimental_clfs/Xaode.hpp b/src/experimental_clfs/Xaode.hpp index 5093853..b11d20a 100644 --- a/src/experimental_clfs/Xaode.hpp +++ b/src/experimental_clfs/Xaode.hpp @@ -31,7 +31,7 @@ namespace platform { double duration_first = 0.0; double duration_second = 0.0; double duration_third = 0.0; - Xaode() : nFeatures_{ 0 }, statesClass_{ 0 }, totalSize_{ 0 }, matrixState_{ MatrixState::EMPTY } {} + Xaode() : nFeatures_{ 0 }, statesClass_{ 0 }, matrixState_{ MatrixState::EMPTY } {} // ------------------------------------------------------- // init // ------------------------------------------------------- @@ -84,9 +84,9 @@ namespace platform { } runningOffset += states_[i]; } - totalSize_ = index * statesClass_; - data_.resize(totalSize_); - dataOpp_.resize(totalSize_); + int totalSize = index * statesClass_; + data_.resize(totalSize); + dataOpp_.resize(totalSize); classFeatureCounts_.resize(feature_offset * statesClass_); classFeatureProbs_.resize(feature_offset * statesClass_); @@ -98,12 +98,6 @@ namespace platform { matrixState_ = MatrixState::COUNTS; } - // Returns the dimension of data_ (just for info). - int size() const - { - return totalSize_; - } - // Returns current mode: INIT, COUNTS or PROBS MatrixState state() const { @@ -116,7 +110,6 @@ namespace platform { std::cout << "-------- Xaode.show() --------" << std::endl << "- nFeatures = " << nFeatures_ << std::endl << "- statesClass = " << statesClass_ << std::endl - << "- totalSize_ = " << totalSize_ << std::endl << "- matrixState = " << (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS") << std::endl; std::cout << "- states: size: " << states_.size() << std::endl; for (int s : states_) std::cout << s << " "; std::cout << std::endl; @@ -543,6 +536,23 @@ namespace platform { { return statesClass_; } + int nFeatures() const + { + return nFeatures_; + } + int getNumberOfStates() const + { + return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_; + } + int getNumberOfEdges() const + { + return nFeatures_ * (2 * nFeatures_ - 1); + } + int getNumberOfNodes() const + { + return (nFeatures_ + 1) * nFeatures_; + } + private: // ----------- @@ -555,7 +565,6 @@ namespace platform { // data_ means p(child=sj | c, superparent= si) after normalization. // But in COUNTS mode, it accumulates raw counts. std::vector pairOffset_; - int totalSize_; // data_ stores p(child=sj | c, superparent=si) for each pair (i data_; // dataOpp_ stores p(superparent=si | c, child=sj) for each pair (i