// *************************************************************** // SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez // SPDX-FileType: SOURCE // SPDX-License-Identifier: MIT // *************************************************************** // Based on the Geoff. I. Webb A1DE java algorithm // https://weka.sourceforge.io/packageMetaData/AnDE/Latest.html #ifndef XAODE_H #define XAODE_H #include #include #include #include #include #include #include #include #include namespace platform { class Xaode { public: // ------------------------------------------------------- // The Xaode can be EMPTY (just created), in COUNTS mode (accumulating raw counts) // or PROBS mode (storing conditional probabilities). enum class MatrixState { EMPTY, COUNTS, PROBS }; std::vector significance_models; Xaode() : nFeatures_{ 0 }, statesClass_{ 0 }, matrixState_{ MatrixState::EMPTY } {} // ------------------------------------------------------- // fit // ------------------------------------------------------- // // Classifiers interface // all parameter decide if the model is initialized with all the parents active or none of them // void fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const bool all_parents) { int num_instances = X[0].size(); int n_features_ = X.size(); significance_models.resize(n_features_, (all_parents ? 1.0 : 0.0)); std::vector statesv; for (int i = 0; i < n_features_; i++) { if (all_parents) active_parents.push_back(i); statesv.push_back(*max_element(X[i].begin(), X[i].end()) + 1); } statesv.push_back(*max_element(y.begin(), y.end()) + 1); // std::cout << "* States: " << statesv << std::endl; // std::cout << "* Weights: " << weights_ << std::endl; // std::cout << "* Instances: " << num_instances << std::endl; // std::cout << "* Attributes: " << n_features_ +1 << std::endl; // std::cout << "* y: " << y << std::endl; // std::cout << "* x shape: " << X.size() << "x" << X[0].size() << std::endl; // for (int i = 0; i < n_features_; i++) { // std::cout << "* " << features[i] << ": " << instances[i] << std::endl; // } // std::cout << "Starting to build the model" << std::endl; init(statesv); std::vector instance(n_features_ + 1); for (int n_instance = 0; n_instance < num_instances; n_instance++) { for (int feature = 0; feature < n_features_; feature++) { instance[feature] = X[feature][n_instance]; } instance[n_features_] = y[n_instance]; addSample(instance, weights[n_instance].item()); } computeProbabilities(); } // ------------------------------------------------------- // init // ------------------------------------------------------- // // states.size() = nFeatures + 1, // where states.back() = number of class states. // // We'll store: // 1) p(x_i=si | c) in classFeatureProbs_ // 2) p(x_j=sj | c, x_i=si) in data_, with i i is "superparent," j is "child." // // Internally, in COUNTS mode, data_ accumulates raw counts, then // computeProbabilities(...) normalizes them into conditionals. // void init(const std::vector& states) { // // Check Valid input data // if (matrixState_ != MatrixState::EMPTY) { throw std::logic_error("Xaode: already initialized."); } states_ = states; nFeatures_ = static_cast(states_.size()) - 1; if (nFeatures_ < 1) { throw std::invalid_argument("Xaode: need at least 1 feature plus class states."); } statesClass_ = states_.back(); if (statesClass_ <= 0) { throw std::invalid_argument("Xaode: class states must be > 0."); } // // Initialize data structures // active_parents.resize(nFeatures_); int totalStates = std::accumulate(states.begin(), states.end(), 0) - statesClass_; // For p(x_i=si | c), we store them in a 1D array classFeatureProbs_ after we compute. // We'll need the offsets for each feature i in featureClassOffset_. featureClassOffset_.resize(nFeatures_); // We'll store p(x_child=sj | c, x_sp=si) for each pair (i& instance, double weight) { // // (A) increment classCounts_ // (B) increment feature–class counts => for p(x_i|c) // (C) increment pair (superparent= i, child= j) counts => data_ // // if (matrixState_ != MatrixState::COUNTS) { // throw std::logic_error("addSample: not in COUNTS mode."); // } // if (static_cast(instance.size()) != nFeatures_ + 1) { // throw std::invalid_argument("addSample: instance.size() must be nFeatures_ + 1."); // } int c = instance.back(); // if (c < 0 || c >= statesClass_) { // throw std::out_of_range("addSample: class index out of range."); // } if (weight <= 0.0) { return; } // (A) increment classCounts_ classCounts_[c] += weight; // (B,C) // We'll store raw counts now and turn them into p(child| c, superparent) later. int idx, fcIndex, si, sj, i_offset; for (int i = 0; i < nFeatures_; ++i) { si = instance[i]; // (B) increment feature–class counts => for p(x_i|c) fcIndex = (featureClassOffset_[i] + si) * statesClass_ + c; classFeatureCounts_[fcIndex] += weight; // (C) increment pair (superparent= i, child= j) counts => data_ i_offset = pairOffset_[featureClassOffset_[i] + si]; for (int j = 0; j < i; ++j) { sj = instance[j]; idx = (i_offset + featureClassOffset_[j] + sj) * statesClass_ + c; data_[idx] += weight; } } } // ------------------------------------------------------- // computeProbabilities // ------------------------------------------------------- // // Once all samples are added in COUNTS mode, call this to: // 1) compute p(x_i=si | c) => classFeatureProbs_ // 2) compute p(x_j=sj | c, x_i=si) => data_ (for ij) // void computeProbabilities() { if (matrixState_ != MatrixState::COUNTS) { throw std::logic_error("computeProbabilities: must be in COUNTS mode."); } double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0); // (1) p(x_i=si | c) => classFeatureProbs_ int idx, sf; double denom, countVal, p; for (int feature = 0; feature < nFeatures_; ++feature) { sf = states_[feature]; for (int c = 0; c < statesClass_; ++c) { denom = classCounts_[c] * sf; if (denom <= 0.0) { // fallback => uniform for (int sf_value = 0; sf_value < sf; ++sf_value) { idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; classFeatureProbs_[idx] = 1.0 / sf; } } else { for (int sf_value = 0; sf_value < sf; ++sf_value) { idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; countVal = classFeatureCounts_[idx]; p = ((countVal + SMOOTHING / (statesClass_ * states_[feature])) / (totalCount + SMOOTHING)); classFeatureProbs_[idx] = p; } } } } // getCountFromTable(int classVal, int pIndex, int childIndex) // (2) p(x_j=sj | c, x_i=si) => data_(i,si,j,sj,c) // (2) p(x_i=si | c, x_j=sj) => dataOpp_(j,sj,i,si,c) double pccCount, pcCount, ccCount; double conditionalProb, oppositeCondProb; int part1, part2, p1, part2_class, p1_class; for (int parent = nFeatures_ - 1; parent >= 0; --parent) { // for (int parent = 3; parent >= 3; --parent) { for (int sp = 0; sp < states_[parent]; ++sp) { p1 = featureClassOffset_[parent] + sp; part1 = pairOffset_[p1]; p1_class = p1 * statesClass_; for (int child = parent - 1; child >= 0; --child) { // for (int child = 2; child >= 2; --child) { for (int sc = 0; sc < states_[child]; ++sc) { part2 = featureClassOffset_[child] + sc; part2_class = part2 * statesClass_; for (int c = 0; c < statesClass_; c++) { //idx = compute_index(parent, sp, child, sc, classval); idx = (part1 + part2) * statesClass_ + c; // Parent, Child, Class Count pccCount = data_[idx]; // Parent, Class count pcCount = classFeatureCounts_[p1_class + c]; // Child, Class count ccCount = classFeatureCounts_[part2_class + c]; conditionalProb = (pccCount + SMOOTHING / states_[parent]) / (ccCount + SMOOTHING); data_[idx] = conditionalProb; oppositeCondProb = (pccCount + SMOOTHING / states_[child]) / (pcCount + SMOOTHING); dataOpp_[idx] = oppositeCondProb; } } } } } matrixState_ = MatrixState::PROBS; } // ------------------------------------------------------- // predict_proba_spode // ------------------------------------------------------- // // Single-superparent approach: // P(c | x) ∝ p(c) * p(x_sp| c) * ∏_{i≠sp} p(x_i | c, x_sp) // // 'instance' should have size == nFeatures_ (no class). // sp in [0..nFeatures_). // We multiply p(c) * p(x_sp| c) * p(x_i| c, x_sp). // Then normalize the distribution. // std::vector predict_proba_spode(const std::vector& instance, int parent) { // accumulates posterior probabilities for each class auto probs = std::vector(statesClass_); auto spodeProbs = std::vector(statesClass_); // Initialize the probabilities with the feature|class probabilities int localOffset; int sp = instance[parent]; localOffset = (featureClassOffset_[parent] + sp) * statesClass_; for (int c = 0; c < statesClass_; ++c) { spodeProbs[c] = classFeatureProbs_[localOffset + c]; } int idx, base, sc, parent_offset; sp = instance[parent]; parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; for (int child = 0; child < parent; ++child) { sc = instance[child]; base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; for (int c = 0; c < statesClass_; ++c) { /* * The probability P(xc|xp,c) is stored in dataOpp_, and * the probability P(xp|xc,c) is stored in data_ */ /* int base = pairOffset_[i * nFeatures_ + j]; int blockSize = states_[i] * states_[j]; return base + c * blockSize + (si * states_[j] + sj); */ // index = compute_index(parent, instance[parent], child, instance[child], classVal); idx = base + c; spodeProbs[c] *= data_[idx]; spodeProbs[c] *= dataOpp_[idx]; } } // Normalize the probabilities normalize(probs); return probs; } int predict_spode(const std::vector& instance, int parent) { auto probs = predict_proba_spode(instance, parent); return (int)std::distance(probs.begin(), std::max_element(probs.begin(), probs.end())); } std::vector predict_proba(const std::vector& instance) { // accumulates posterior probabilities for each class auto probs = std::vector(statesClass_); auto spodeProbs = std::vector>(nFeatures_, std::vector(statesClass_)); // Initialize the probabilities with the feature|class probabilities int localOffset; for (int feature = 0; feature < nFeatures_; ++feature) { // if feature is not in the active_parents, skip it if (std::find(active_parents.begin(), active_parents.end(), feature) == active_parents.end()) { continue; } localOffset = (featureClassOffset_[feature] + instance[feature]) * statesClass_; for (int c = 0; c < statesClass_; ++c) { spodeProbs[feature][c] = classFeatureProbs_[localOffset + c]; } } int idx, base, sp, sc, parent_offset; for (int parent = 1; parent < nFeatures_; ++parent) { // if parent is not in the active_parents, skip it if (std::find(active_parents.begin(), active_parents.end(), parent) == active_parents.end()) { continue; } sp = instance[parent]; parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; for (int child = 0; child < parent; ++child) { sc = instance[child]; base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; for (int c = 0; c < statesClass_; ++c) { /* * The probability P(xc|xp,c) is stored in dataOpp_, and * the probability P(xp|xc,c) is stored in data_ */ /* int base = pairOffset_[i * nFeatures_ + j]; int blockSize = states_[i] * states_[j]; return base + c * blockSize + (si * states_[j] + sj); */ // index = compute_index(parent, instance[parent], child, instance[child], classVal); idx = base + c; spodeProbs[child][c] *= data_[idx]; // spodeProbs[child][c] *= data_.at(index); spodeProbs[parent][c] *= dataOpp_[idx]; // spodeProbs[parent][c] *= dataOpp_.at(index); } } } /* add all the probabilities for each class */ for (int c = 0; c < statesClass_; ++c) { for (int i = 0; i < nFeatures_; ++i) { probs[c] += spodeProbs[i][c]; } } // Normalize the probabilities normalize(probs); return probs; } void normalize(std::vector& probs) const { double sum = 0; for (double d : probs) { sum += d; } if (std::isnan(sum)) { throw std::runtime_error("Can't normalize array. Sum is NaN."); } if (sum == 0) { return; } for (int i = 0; i < (int)probs.size(); i++) { probs[i] /= sum; } } int statesClass() const { return statesClass_; } int nFeatures() const { return nFeatures_; } int getNumberOfStates() const { return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_; } int getNumberOfEdges() const { return nFeatures_ * (2 * nFeatures_ - 1); } int getNumberOfNodes() const { return (nFeatures_ + 1) * nFeatures_; } void add_active_parent(int active_parent) { active_parents.push_back(active_parent); } void remove_last_parent() { active_parents.pop_back(); } private: // ----------- // MEMBER DATA // ----------- std::vector states_; // [states_feat0, ..., states_feat(n-1), statesClass_] int nFeatures_; int statesClass_; // data_ means p(child=sj | c, superparent= si) after normalization. // But in COUNTS mode, it accumulates raw counts. std::vector pairOffset_; // data_ stores p(child=sj | c, superparent=si) for each pair (i data_; // dataOpp_ stores p(superparent=si | c, child=sj) for each pair (i dataOpp_; // classCounts_[c] std::vector classCounts_; // For p(x_i=si| c), we store counts in classFeatureCounts_ => offset by featureClassOffset_[i] std::vector featureClassOffset_; std::vector classFeatureCounts_; std::vector classFeatureProbs_; // => p(x_i=si | c) after normalization MatrixState matrixState_; double SMOOTHING = 1.0; std::vector active_parents; }; } #endif // XAODE_H