diff --git a/src/experimental_clfs/XBAODE.cpp b/src/experimental_clfs/XBAODE.cpp index f940f61..dc7d653 100644 --- a/src/experimental_clfs/XBAODE.cpp +++ b/src/experimental_clfs/XBAODE.cpp @@ -9,6 +9,7 @@ #include #include #include "XBAODE.h" +#include "XSpode.hpp" #include "TensorUtils.hpp" #include @@ -86,7 +87,8 @@ namespace platform { while (counter++ < k && featureSelection.size() > 0) { auto feature = featureSelection[0]; featureSelection.erase(featureSelection.begin()); - add_active_parent(feature); + auto model = XSpode(feature); + model.fit(X_train_, y_train_, weights_); alpha_t = 0.0; std::vector ypred; if (alpha_block) { @@ -104,7 +106,7 @@ namespace platform { aode_.remove_last_parent(); n_models--; } else { - ypred = predict_spode(X_train_, feature); + ypred = model.predict(X_train_); } // Step 3.1: Compute the classifier amout of say auto ypred_t = torch::tensor(ypred); diff --git a/src/experimental_clfs/XSpode.hpp b/src/experimental_clfs/XSpode.hpp new file mode 100644 index 0000000..1f1c10c --- /dev/null +++ b/src/experimental_clfs/XSpode.hpp @@ -0,0 +1,394 @@ +#ifndef XSPODE_H +#define XSPODE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace platform { + + class XSpode { + public: + // -------------------------------------- + // The SPODE can be EMPTY (just created), + // in COUNTS mode (accumulating raw counts), + // or in PROBS mode (storing conditional probabilities). + // -------------------------------------- + enum class MatrixState { + EMPTY, + COUNTS, + PROBS + }; + + // -------------------------------------- + // Constructor + // + // Supply which feature index is the single super-parent (“spIndex”). + // -------------------------------------- + XSpode(int spIndex) + : superParent_{ spIndex }, + nFeatures_{ 0 }, + statesClass_{ 0 }, + matrixState_{ MatrixState::EMPTY }, + alpha_{ 1.0 } + { + } + + // -------------------------------------- + // fit + // -------------------------------------- + // + // Trains the SPODE given data: + // X: X[f][n] is the f-th feature value for instance n + // y: y[n] is the class value for instance n + // states: a map or array that tells how many distinct states each feature and the class can take + // + // For example, states_.back() is the number of class states, + // and states_[f] is the number of distinct values for feature f. + // + // We only store conditional probabilities for: + // p(x_sp| c) (the super-parent feature) + // p(x_child| c, x_sp) for all child ≠ sp + // + // The “weights” can be a vector of per-instance weights; if not used, pass them as 1.0. + // -------------------------------------- + void fit(const std::vector>& X, + const std::vector& y, + const torch::Tensor& weights) + { + int numInstances = static_cast(y.size()); + nFeatures_ = static_cast(X.size()); + + // Derive the number of states for each feature and for the class. + // (This is just one approach; adapt to match your environment.) + // Here, we assume the user also gave us the total #states per feature in e.g. statesMap. + // We'll simply reconstruct the integer states_ array. The last entry is statesClass_. + states_.resize(nFeatures_); + for (int f = 0; f < nFeatures_; f++) { + // Suppose you look up in “statesMap” by the feature name, or read directly from X. + // We'll assume states_[f] = max value in X[f] + 1. + auto maxIt = std::max_element(X[f].begin(), X[f].end()); + states_[f] = (*maxIt) + 1; + } + // For the class: states_.back() = max(y)+1 + statesClass_ = (*std::max_element(y.begin(), y.end())) + 1; + + // Initialize counts + classCounts_.resize(statesClass_, 0.0); + // p(x_sp = spVal | c) + // We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c]. + spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0); + + // For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of childCounts_. + // childCounts_ will be sized as sum_{child≠sp} (states_[child] * statesClass_ * states_[sp]). + // We also need an offset for each child to index into childCounts_. + childOffsets_.resize(nFeatures_, -1); + int totalSize = 0; + for (int f = 0; f < nFeatures_; f++) { + if (f == superParent_) continue; // skip sp + childOffsets_[f] = totalSize; + // block size for this child's counts: states_[f] * statesClass_ * states_[superParent_] + totalSize += (states_[f] * statesClass_ * states_[superParent_]); + } + childCounts_.resize(totalSize, 0.0); + + // Switch to COUNTS mode + matrixState_ = MatrixState::COUNTS; + + // Accumulate raw counts + for (int n = 0; n < numInstances; n++) { + std::vector instance(nFeatures_ + 1); + for (int f = 0; f < nFeatures_; f++) { + instance[f] = X[f][n]; + } + instance[nFeatures_] = y[n]; + addSample(instance, weights[n].item()); + } + + // Laplace smoothing scaled to #instances + alpha_ = 1.0 / static_cast(numInstances); + initializer_ = initializer_ = std::numeric_limits::max() / (nFeatures_ * nFeatures_); + // Convert raw counts to probabilities + computeProbabilities(); + } + + // -------------------------------------- + // addSample (only valid in COUNTS mode) + // -------------------------------------- + // + // instance has size nFeatures_ + 1, with the class at the end. + // We add 1 to the appropriate counters for each (c, superParentVal, childVal). + // + void addSample(const std::vector& instance, double weight) + { + if (matrixState_ != MatrixState::COUNTS) { + throw std::logic_error("addSample: Not in COUNTS mode!"); + } + if (weight <= 0.0) return; + + int c = instance.back(); + // (A) increment classCounts + classCounts_[c] += weight; + + // (B) increment super-parent counts => p(x_sp | c) + int spVal = instance[superParent_]; + spFeatureCounts_[spVal * statesClass_ + c] += weight; + + // (C) increment child counts => p(childVal | c, x_sp) + for (int f = 0; f < nFeatures_; f++) { + if (f == superParent_) continue; + int childVal = instance[f]; + int offset = childOffsets_[f]; + // Compute index in childCounts_. + // Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ] + int blockSize = states_[f] * statesClass_; + int idx = offset + spVal * blockSize + childVal * statesClass_ + c; + childCounts_[idx] += weight; + } + } + + // -------------------------------------- + // computeProbabilities + // -------------------------------------- + // + // Once all samples are added in COUNTS mode, call this to: + // p(c) + // p(x_sp = spVal | c) + // p(x_child = v | c, x_sp = s_sp) + // + // We store them in the corresponding *Probs_ arrays for inference. + // -------------------------------------- + void computeProbabilities() + { + if (matrixState_ != MatrixState::COUNTS) { + throw std::logic_error("computeProbabilities: must be in COUNTS mode."); + } + + double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0); + + // p(c) => classPriors_ + classPriors_.resize(statesClass_, 0.0); + if (totalCount <= 0.0) { + // fallback => uniform + double unif = 1.0 / static_cast(statesClass_); + for (int c = 0; c < statesClass_; c++) { + classPriors_[c] = unif; + } + } else { + for (int c = 0; c < statesClass_; c++) { + classPriors_[c] = (classCounts_[c] + alpha_) + / (totalCount + alpha_ * statesClass_); + } + } + + // p(x_sp | c) + spFeatureProbs_.resize(spFeatureCounts_.size()); + // denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ * (#states of sp) + int spCard = states_[superParent_]; + for (int spVal = 0; spVal < spCard; spVal++) { + for (int c = 0; c < statesClass_; c++) { + double denom = classCounts_[c] + alpha_ * spCard; + double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_; + spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom); + } + } + + // p(x_child | c, x_sp) + childProbs_.resize(childCounts_.size()); + for (int f = 0; f < nFeatures_; f++) { + if (f == superParent_) continue; + int offset = childOffsets_[f]; + int childCard = states_[f]; + + // For each spVal, c, childVal in childCounts_: + for (int spVal = 0; spVal < spCard; spVal++) { + for (int childVal = 0; childVal < childCard; childVal++) { + for (int c = 0; c < statesClass_; c++) { + int idx = offset + spVal * (childCard * statesClass_) + + childVal * statesClass_ + + c; + + double num = childCounts_[idx] + alpha_; + // denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * (#states of child) + double denom = spFeatureCounts_[spVal * statesClass_ + c] + + alpha_ * childCard; + childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom); + } + } + } + } + + matrixState_ = MatrixState::PROBS; + } + + // -------------------------------------- + // predict_proba + // -------------------------------------- + // + // For a single instance x of dimension nFeatures_: + // P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp). + // + // Then we normalize the result. + // -------------------------------------- + std::vector predict_proba(const std::vector& instance) const + { + if (matrixState_ != MatrixState::PROBS) { + throw std::logic_error("predict_proba: the model is not in PROBS mode."); + } + + std::vector probs(statesClass_, 0.0); + + // Multiply p(c) × p(x_sp | c) + int spVal = instance[superParent_]; + for (int c = 0; c < statesClass_; c++) { + double pc = classPriors_[c]; + double pSpC = spFeatureProbs_[spVal * statesClass_ + c]; + probs[c] = pc * pSpC * initializer_; + } + + // Multiply by each child’s probability p(x_child | c, x_sp) + for (int feature = 0; feature < nFeatures_; feature++) { + if (feature == superParent_) continue; // skip sp + int sf = instance[feature]; + int offset = childOffsets_[feature]; + int childCard = states_[feature]; // not used directly, but for clarity + // Index into childProbs_ = offset + spVal*(childCard*statesClass_) + childVal*statesClass_ + c + int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_; + for (int c = 0; c < statesClass_; c++) { + probs[c] *= childProbs_[base + c]; + } + } + + // Normalize + normalize(probs); + return probs; + } + + // -------------------------------------- + // predict + // -------------------------------------- + // + // Return the class argmax( P(c|x) ). + // -------------------------------------- + int predict(const std::vector& instance) const + { + auto p = predict_proba(instance); + return static_cast(std::distance(p.begin(), + std::max_element(p.begin(), p.end()))); + } + std::vector predict(const std::vector>& X) const + { + std::vector preds; + for (const auto& instance : X) { + preds.push_back(predict(instance)); + } + return preds; + } + + // -------------------------------------- + // Utility: normalize + // -------------------------------------- + void normalize(std::vector& v) const + { + double sum = 0.0; + for (auto val : v) { sum += val; } + if (sum <= 0.0) { + return; + } + for (auto& val : v) { + val /= sum; + } + } + + // -------------------------------------- + // debug printing, if desired + // -------------------------------------- + std::string to_string() const + { + std::ostringstream oss; + oss << "---- SPODE Model ----\n" + << "nFeatures_ = " << nFeatures_ << "\n" + << "superParent_ = " << superParent_ << "\n" + << "statesClass_ = " << statesClass_ << "\n" + << "matrixState_ = " + << (matrixState_ == MatrixState::EMPTY ? "EMPTY" + : (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS")) + << "\n"; + + oss << "States: ["; + for (int s : states_) oss << s << " "; + oss << "]\n"; + + oss << "classCounts_: ["; + for (double c : classCounts_) oss << c << " "; + oss << "]\n"; + + oss << "classPriors_: ["; + for (double c : classPriors_) oss << c << " "; + oss << "]\n"; + + oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << "\n["; + for (double c : spFeatureCounts_) oss << c << " "; + oss << "]\n"; + + oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << "\n["; + for (double c : spFeatureProbs_) oss << c << " "; + oss << "]\n"; + + oss << "childCounts_: size = " << childCounts_.size() << "\n["; + for (double cc : childCounts_) oss << cc << " "; + oss << "]\n"; + + oss << "childProbs_: size = " << childProbs_.size() << "\n["; + for (double cp : childProbs_) oss << cp << " "; + oss << "]\n"; + + oss << "childOffsets_: ["; + for (int co : childOffsets_) oss << co << " "; + oss << "]\n"; + + oss << "---------------------\n"; + return oss.str(); + } + + private: + // -------------------------------------- + // MEMBERS + // -------------------------------------- + + int superParent_; // which feature is the single super-parent + int nFeatures_; + int statesClass_; + std::vector states_; // [states_feat0, ..., states_feat(N-1)] (class not included in this array) + + // Class counts + std::vector classCounts_; // [c], accumulative + std::vector classPriors_; // [c], after normalization + + // For p(x_sp = spVal | c) + std::vector spFeatureCounts_; // [spVal * statesClass_ + c] + std::vector spFeatureProbs_; // same shape, after normalization + + // For p(x_child = childVal | x_sp = spVal, c) + // childCounts_ is big enough to hold all child features except sp: + // For each child f, we store childOffsets_[f] as the start index, then + // childVal, spVal, c => the data. + std::vector childCounts_; + std::vector childProbs_; + std::vector childOffsets_; + + MatrixState matrixState_; + double alpha_ = 1.0; + double initializer_; // for numerical stability + }; + +} // namespace platform + +#endif // XSPODE_H diff --git a/src/experimental_clfs/Xaode2.hpp b/src/experimental_clfs/Xaode2.hpp index dd5f15d..520c8e7 100644 --- a/src/experimental_clfs/Xaode2.hpp +++ b/src/experimental_clfs/Xaode2.hpp @@ -9,14 +9,16 @@ #ifndef XAODE2_H #define XAODE2_H #include +#include #include #include #include -#include #include #include #include -#include +#include + +#include namespace platform { class Xaode2 { @@ -108,32 +110,39 @@ namespace platform { instance[nFeatures_] = y[n_instance]; addSample(instance, weights[n_instance].item()); } - //alpha_ = 1 / num_instances; + // alpha_ Laplace smoothing adapted to the number of instances + alpha_ = 1.0 / static_cast(num_instances); initializer_ = std::numeric_limits::max() / (nFeatures_ * nFeatures_); computeProbabilities(); } - // Optional: print a quick summary - void show() const + std::string to_string() const { - std::cout << "-------- Xaode.show() --------" << std::endl + std::ostringstream ostream; + ostream << "-------- Xaode.status --------" << std::endl << "- nFeatures = " << nFeatures_ << std::endl << "- statesClass = " << statesClass_ << std::endl << "- matrixState = " << (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS") << std::endl; - std::cout << "- states: size: " << states_.size() << std::endl; - for (int s : states_) std::cout << s << " "; std::cout << std::endl; - std::cout << "- classCounts: size: " << classCounts_.size() << std::endl; - for (double cc : classCounts_) std::cout << cc << " "; std::cout << std::endl; - std::cout << "- classFeatureCounts: size: " << classFeatureCounts_.size() << std::endl; - for (double cfc : classFeatureCounts_) std::cout << cfc << " "; std::cout << std::endl; - std::cout << "- classFeatureProbs: size: " << classFeatureProbs_.size() << std::endl; - for (double cfp : classFeatureProbs_) std::cout << cfp << " "; std::cout << std::endl; - std::cout << "- featureClassOffset: size: " << featureClassOffset_.size() << std::endl; - for (int f : featureClassOffset_) std::cout << f << " "; std::cout << std::endl; - std::cout << "- pairOffset_: size: " << pairOffset_.size() << std::endl; - for (int p : pairOffset_) std::cout << p << " "; std::cout << std::endl; - std::cout << "- data: size: " << data_.size() << std::endl; - for (double d : data_) std::cout << d << " "; std::cout << std::endl; - std::cout << "--------------------------------" << std::endl; + ostream << "- states: size: " << states_.size() << std::endl; + for (int s : states_) ostream << s << " "; ostream << std::endl; + ostream << "- classCounts: size: " << classCounts_.size() << std::endl; + for (double cc : classCounts_) ostream << cc << " "; ostream << std::endl; + ostream << "- classPriors: size: " << classPriors_.size() << std::endl; + for (double cp : classPriors_) ostream << cp << " "; ostream << std::endl; + ostream << "- classFeatureCounts: size: " << classFeatureCounts_.size() << std::endl; + for (double cfc : classFeatureCounts_) ostream << cfc << " "; ostream << std::endl; + ostream << "- classFeatureProbs: size: " << classFeatureProbs_.size() << std::endl; + for (double cfp : classFeatureProbs_) ostream << cfp << " "; ostream << std::endl; + ostream << "- featureClassOffset: size: " << featureClassOffset_.size() << std::endl; + for (int f : featureClassOffset_) ostream << f << " "; ostream << std::endl; + ostream << "- pairOffset_: size: " << pairOffset_.size() << std::endl; + for (int p : pairOffset_) ostream << p << " "; ostream << std::endl; + ostream << "- data: size: " << data_.size() << std::endl; + for (double d : data_) ostream << d << " "; ostream << std::endl; + ostream << "- dataOpp: size: " << dataOpp_.size() << std::endl; + for (double d : dataOpp_) ostream << d << " "; ostream << std::endl; + ostream << "--------------------------------" << std::endl; + std::string output = ostream.str(); + return output; } // ------------------------------------------------------- // addSample (only in COUNTS mode) @@ -148,18 +157,7 @@ namespace platform { // (B) increment feature–class counts => for p(x_i|c) // (C) increment pair (superparent= i, child= j) counts => data_ // - - // if (matrixState_ != MatrixState::COUNTS) { - // throw std::logic_error("addSample: not in COUNTS mode."); - // } - // if (static_cast(instance.size()) != nFeatures_ + 1) { - // throw std::invalid_argument("addSample: instance.size() must be nFeatures_ + 1."); - // } - int c = instance.back(); - // if (c < 0 || c >= statesClass_) { - // throw std::out_of_range("addSample: class index out of range."); - // } if (weight <= 0.0) { return; } @@ -168,17 +166,17 @@ namespace platform { // (B,C) // We'll store raw counts now and turn them into p(child| c, superparent) later. - int idx, fcIndex, si, sj, i_offset; - for (int i = 0; i < nFeatures_; ++i) { - si = instance[i]; + int idx, fcIndex, sp, sc, i_offset; + for (int parent = 0; parent < nFeatures_; ++parent) { + sp = instance[parent]; // (B) increment feature–class counts => for p(x_i|c) - fcIndex = (featureClassOffset_[i] + si) * statesClass_ + c; + fcIndex = (featureClassOffset_[parent] + sp) * statesClass_ + c; classFeatureCounts_[fcIndex] += weight; // (C) increment pair (superparent= i, child= j) counts => data_ - i_offset = pairOffset_[featureClassOffset_[i] + si]; - for (int j = 0; j < i; ++j) { - sj = instance[j]; - idx = (i_offset + featureClassOffset_[j] + sj) * statesClass_ + c; + i_offset = pairOffset_[featureClassOffset_[parent] + sp]; + for (int child = 0; child < parent; ++child) { + sc = instance[child]; + idx = (i_offset + featureClassOffset_[child] + sc) * statesClass_ + c; data_[idx] += weight; } } @@ -207,36 +205,26 @@ namespace platform { } } else { for (int c = 0; c < statesClass_; ++c) { - classPriors_[c] = classCounts_[c] / totalCount; + classPriors_[c] = (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_); } } // (2) p(x_i=si | c) => classFeatureProbs_ int idx, sf; - double denom, countVal, p; + double denom; for (int feature = 0; feature < nFeatures_; ++feature) { sf = states_[feature]; for (int c = 0; c < statesClass_; ++c) { - denom = classCounts_[c] * sf; - if (denom <= 0.0) { - // fallback => uniform - for (int sf_value = 0; sf_value < sf; ++sf_value) { - idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; - classFeatureProbs_[idx] = 1.0 / sf; - } - } else { - for (int sf_value = 0; sf_value < sf; ++sf_value) { - idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; - countVal = classFeatureCounts_[idx]; - p = ((countVal + alpha_ / (statesClass_ * states_[feature])) / (totalCount + alpha_)); - classFeatureProbs_[idx] = p; - } + denom = classCounts_[c] + alpha_ * sf; + for (int sf_value = 0; sf_value < sf; ++sf_value) { + idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; + classFeatureProbs_[idx] = (classFeatureCounts_[idx] + alpha_) / denom; } } } // getCountFromTable(int classVal, int pIndex, int childIndex) // (3) p(x_c=sc | c, x_p=sp) => data_(parent,sp,child,sc,c) // (3) p(x_p=sp | c, x_c=sc) => dataOpp_(child,sc,parent,sp,c) - // C(x_c, x_p, c) + alpha_/Card(xp) + // C(x_c, x_p, c) + alpha_ // P(x_p | x_c, c) = ----------------------------------- // C(x_c, c) + alpha_ double pcc_count, pc_count, cc_count; @@ -260,10 +248,10 @@ namespace platform { // Child, Class count cc_count = classFeatureCounts_[part2_class + c]; // p(x_c=sc | c, x_p=sp) - conditionalProb = (pcc_count + alpha_ / states_[parent]) / (cc_count + alpha_); + conditionalProb = (pcc_count + alpha_) / (pc_count + alpha_ * states_[child]); data_[idx] = conditionalProb; // p(x_p=sp | c, x_c=sc) - oppositeCondProb = (pcc_count + alpha_ / states_[child]) / (pc_count + alpha_); + oppositeCondProb = (pcc_count + alpha_) / (cc_count + alpha_ * states_[parent]); dataOpp_[idx] = oppositeCondProb; } } @@ -288,7 +276,10 @@ namespace platform { { // accumulates posterior probabilities for each class auto probs = std::vector(statesClass_); - auto spodeProbs = std::vector(statesClass_); + auto spodeProbs = std::vector(statesClass_, 0.0); + if (std::find(active_parents.begin(), active_parents.end(), parent) == active_parents.end()) { + return spodeProbs; + } // Initialize the probabilities with the feature|class probabilities x class priors int localOffset; int sp = instance[parent]; @@ -297,21 +288,27 @@ namespace platform { spodeProbs[c] = classFeatureProbs_[localOffset + c] * classPriors_[c] * initializer_; } int idx, base, sc, parent_offset; - sp = instance[parent]; - parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; for (int child = 0; child < nFeatures_; ++child) { if (child == parent) { continue; } sc = instance[child]; - base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; + if (child > parent) { + parent_offset = pairOffset_[featureClassOffset_[child] + sc]; + base = (parent_offset + featureClassOffset_[parent] + sp) * statesClass_; + } else { + parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; + base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; + } for (int c = 0; c < statesClass_; ++c) { /* * The probability P(xc|xp,c) is stored in dataOpp_, and * the probability P(xp|xc,c) is stored in data_ */ idx = base + c; - spodeProbs[c] *= child < parent ? dataOpp_[idx] : data_[idx]; + double factor = child > parent ? dataOpp_[idx] : data_[idx]; + // double factor = data_[idx]; + spodeProbs[c] *= factor; } } // Normalize the probabilities @@ -347,7 +344,7 @@ namespace platform { } localOffset = (featureClassOffset_[feature] + instance[feature]) * statesClass_; for (int c = 0; c < statesClass_; ++c) { - spodeProbs[feature][c] = classFeatureProbs_[localOffset + c] * classPriors_[c]; + spodeProbs[feature][c] = classFeatureProbs_[localOffset + c] * classPriors_[c] * initializer_; } } int idx, base, sp, sc, parent_offset; @@ -360,15 +357,23 @@ namespace platform { parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; for (int child = 0; child < parent; ++child) { sc = instance[child]; - base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; + if (child > parent) { + parent_offset = pairOffset_[featureClassOffset_[child] + sc]; + base = (parent_offset + featureClassOffset_[parent] + sp) * statesClass_; + } else { + parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; + base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; + } for (int c = 0; c < statesClass_; ++c) { /* * The probability P(xc|xp,c) is stored in dataOpp_, and * the probability P(xp|xc,c) is stored in data_ */ idx = base + c; - spodeProbs[child][c] *= data_[idx]; - spodeProbs[parent][c] *= dataOpp_[idx]; + double factor_child = child > parent ? data_[idx] : dataOpp_[idx]; + double factor_parent = child > parent ? dataOpp_[idx] : data_[idx]; + spodeProbs[child][c] *= factor_child; + spodeProbs[parent][c] *= factor_parent; } } } @@ -456,8 +461,8 @@ namespace platform { MatrixState matrixState_; - double alpha_ = 1.0; - double initializer_ = std::numeric_limits::max(); + double alpha_ = 1.0; // Laplace smoothing + double initializer_ = 1.0; std::vector active_parents; }; }