diff --git a/src/experimental_clfs/XBAODE.cpp b/src/experimental_clfs/XBAODE.cpp index 5e77885..f940f61 100644 --- a/src/experimental_clfs/XBAODE.cpp +++ b/src/experimental_clfs/XBAODE.cpp @@ -76,9 +76,9 @@ namespace platform { std::shuffle(featureSelection.begin(), featureSelection.end(), g); } // Remove used features - featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) - { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}), - end(featureSelection) + featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), [&](auto x) + { return std::find(featuresUsed.begin(), featuresUsed.end(), x) != featuresUsed.end();}), + featureSelection.end() ); int k = bisection ? pow(2, tolerance) : 1; int counter = 0; // The model counter of the current pack diff --git a/src/experimental_clfs/Xaode.hpp b/src/experimental_clfs/Xaode.hpp index d24eadf..1126ebe 100644 --- a/src/experimental_clfs/Xaode.hpp +++ b/src/experimental_clfs/Xaode.hpp @@ -60,6 +60,8 @@ namespace platform { states_.push_back(*max_element(y.begin(), y.end()) + 1); // statesClass_ = states_.back(); + classCounts_.resize(statesClass_, 0.0); + classPriors_.resize(statesClass_, 0.0); // // Initialize data structures // @@ -94,9 +96,6 @@ namespace platform { classFeatureCounts_.resize(feature_offset * statesClass_); classFeatureProbs_.resize(feature_offset * statesClass_); - // classCounts_[c] - classCounts_.resize(statesClass_, 0.0); - matrixState_ = MatrixState::COUNTS; // // Add samples @@ -187,8 +186,9 @@ namespace platform { // ------------------------------------------------------- // // Once all samples are added in COUNTS mode, call this to: - // 1) compute p(x_i=si | c) => classFeatureProbs_ - // 2) compute p(x_j=sj | c, x_i=si) => data_ (for ij) + // 1) compute p(c) => classPriors_ + // 2) compute p(x_i=si | c) => classFeatureProbs_ + // 3) compute p(x_j=sj | c, x_i=si) => data_ (for ij) // void computeProbabilities() { @@ -196,32 +196,67 @@ namespace platform { throw std::logic_error("computeProbabilities: must be in COUNTS mode."); } double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0); - // (1) p(x_i=si | c) => classFeatureProbs_ + // (1) p(c) + if (totalCount <= 0.0) { + // fallback => uniform + double unif = 1.0 / statesClass_; + for (int c = 0; c < statesClass_; ++c) { + classPriors_[c] = unif; + } + } else { + for (int c = 0; c < statesClass_; ++c) { + classPriors_[c] = classCounts_[c] / totalCount; + } + } + // (2) p(x_i=si | c) => classFeatureProbs_ int idx, sf; double denom, countVal, p; + // for (int feature = 0; feature < nFeatures_; ++feature) { + // sf = states_[feature]; + // for (int c = 0; c < statesClass_; ++c) { + // denom = classCounts_[c] * sf; + // if (denom <= 0.0) { + // // fallback => uniform + // for (int sf_value = 0; sf_value < sf; ++sf_value) { + // idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; + // classFeatureProbs_[idx] = 1.0 / sf; + // } + // } else { + // for (int sf_value = 0; sf_value < sf; ++sf_value) { + // idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; + // countVal = classFeatureCounts_[idx]; + // p = ((countVal + SMOOTHING / (statesClass_ * states_[feature])) / (totalCount + SMOOTHING)); + // classFeatureProbs_[idx] = p; + // } + // } + // } + // } + double alpha = SMOOTHING; for (int feature = 0; feature < nFeatures_; ++feature) { - sf = states_[feature]; + int sf = states_[feature]; for (int c = 0; c < statesClass_; ++c) { - denom = classCounts_[c] * sf; - if (denom <= 0.0) { + double denom = classCounts_[c] + alpha * sf; // typical Laplace smoothing denominator + if (classCounts_[c] <= 0.0) { // fallback => uniform for (int sf_value = 0; sf_value < sf; ++sf_value) { - idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; + int idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; classFeatureProbs_[idx] = 1.0 / sf; } } else { for (int sf_value = 0; sf_value < sf; ++sf_value) { - idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; - countVal = classFeatureCounts_[idx]; - p = ((countVal + SMOOTHING / (statesClass_ * states_[feature])) / (totalCount + SMOOTHING)); + int idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c; + double countVal = classFeatureCounts_[idx]; + // standard NB with Laplace alpha + double p = (countVal + alpha) / denom; classFeatureProbs_[idx] = p; } } } } + // getCountFromTable(int classVal, int pIndex, int childIndex) - // (2) p(x_j=sj | c, x_i=si) => data_(i,si,j,sj,c) - // (2) p(x_i=si | c, x_j=sj) => dataOpp_(j,sj,i,si,c) + // (3) p(x_j=sj | c, x_i=si) => data_(i,si,j,sj,c) + // (3) p(x_i=si | c, x_j=sj) => dataOpp_(j,sj,i,si,c) double pccCount, pcCount, ccCount; double conditionalProb, oppositeCondProb; int part1, part2, p1, part2_class, p1_class; @@ -231,13 +266,15 @@ namespace platform { p1 = featureClassOffset_[parent] + sp; part1 = pairOffset_[p1]; p1_class = p1 * statesClass_; + + // int parentStates = states_[parent]; + for (int child = parent - 1; child >= 0; --child) { // for (int child = 2; child >= 2; --child) { for (int sc = 0; sc < states_[child]; ++sc) { part2 = featureClassOffset_[child] + sc; part2_class = part2 * statesClass_; for (int c = 0; c < statesClass_; c++) { - //idx = compute_index(parent, sp, child, sc, classval); idx = (part1 + part2) * statesClass_ + c; // Parent, Child, Class Count pccCount = data_[idx]; @@ -246,8 +283,19 @@ namespace platform { // Child, Class count ccCount = classFeatureCounts_[part2_class + c]; conditionalProb = (pccCount + SMOOTHING / states_[parent]) / (ccCount + SMOOTHING); + + // pcCount = classFeatureCounts_[(featureClassOffset_[parent] + sp) * statesClass_ + c]; + // // This is the "parent, class" count + // int childStates = states_[child]; + // conditionalProb = (pccCount + alpha) / (pcCount + alpha * childStates); data_[idx] = conditionalProb; + + + oppositeCondProb = (pccCount + SMOOTHING / states_[child]) / (pcCount + SMOOTHING); + + // ccCount = classFeatureCounts_[(featureClassOffset_[child] + sc) * statesClass_ + c]; + // oppositeCondProb = (pccCount + alpha) / (ccCount + alpha * parentStates); dataOpp_[idx] = oppositeCondProb; } } @@ -268,50 +316,55 @@ namespace platform { // We multiply p(c) * p(x_sp| c) * p(x_i| c, x_sp). // Then normalize the distribution. // - std::vector predict_proba_spode(const std::vector& instance, int parent) - { - // accumulates posterior probabilities for each class - auto probs = std::vector(statesClass_); - auto spodeProbs = std::vector(statesClass_); - // Initialize the probabilities with the feature|class probabilities - int localOffset; - int sp = instance[parent]; - localOffset = (featureClassOffset_[parent] + sp) * statesClass_; - for (int c = 0; c < statesClass_; ++c) { - spodeProbs[c] = classFeatureProbs_[localOffset + c]; - } - int idx, base, sc, parent_offset; - sp = instance[parent]; - parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; - for (int child = 0; child < parent; ++child) { - sc = instance[child]; - base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; - for (int c = 0; c < statesClass_; ++c) { - /* - * The probability P(xc|xp,c) is stored in dataOpp_, and - * the probability P(xp|xc,c) is stored in data_ - */ - /* - int base = pairOffset_[i * nFeatures_ + j]; - int blockSize = states_[i] * states_[j]; - return base + c * blockSize + (si * states_[j] + sj); - */ - // index = compute_index(parent, instance[parent], child, instance[child], classVal); - idx = base + c; - spodeProbs[c] *= data_[idx]; - spodeProbs[c] *= dataOpp_[idx]; - } - } - // Normalize the probabilities - normalize(spodeProbs); - return spodeProbs; - } + // std::vector predict_proba_spode(const std::vector& instance, int parent) + // { + // // accumulates posterior probabilities for each class + // auto probs = std::vector(statesClass_); + // auto spodeProbs = std::vector(statesClass_); + // // Initialize the probabilities with the feature|class probabilities + // int localOffset; + // int sp = instance[parent]; + // localOffset = (featureClassOffset_[parent] + sp) * statesClass_; + // for (int c = 0; c < statesClass_; ++c) { + // spodeProbs[c] = classFeatureProbs_[localOffset + c] * classPriors_[c]; + // } + // int idx, base, sc, parent_offset; + // sp = instance[parent]; + // parent_offset = pairOffset_[featureClassOffset_[parent] + sp]; + // for (int child = 0; child < parent; ++child) { + // sc = instance[child]; + // base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_; + // for (int c = 0; c < statesClass_; ++c) { + // /* + // * The probability P(xc|xp,c) is stored in dataOpp_, and + // * the probability P(xp|xc,c) is stored in data_ + // */ + // idx = base + c; + // spodeProbs[c] *= data_[idx]; + // spodeProbs[c] *= dataOpp_[idx]; + // } + // } + // // Normalize the probabilities + // normalize(spodeProbs); + // return spodeProbs; + // } int predict_spode(const std::vector& instance, int parent) { - auto probs = predict_proba_spode(instance, parent); + auto probs = predict_proba(instance, parent); return (int)std::distance(probs.begin(), std::max_element(probs.begin(), probs.end())); } - std::vector predict_proba(const std::vector& instance) + // ------------------------------------------------------- + // predict_proba + // ------------------------------------------------------- + // + // P(c | x) ∝ p(c) * ∏_{i} p(x_i | c) * ∏_{i predict_proba(const std::vector& instance, int spode = -1) { // accumulates posterior probabilities for each class auto probs = std::vector(statesClass_); @@ -325,7 +378,7 @@ namespace platform { } localOffset = (featureClassOffset_[feature] + instance[feature]) * statesClass_; for (int c = 0; c < statesClass_; ++c) { - spodeProbs[feature][c] = classFeatureProbs_[localOffset + c]; + spodeProbs[feature][c] = classFeatureProbs_[localOffset + c] * classPriors_[c]; } } int idx, base, sp, sc, parent_offset; @@ -344,24 +397,21 @@ namespace platform { * The probability P(xc|xp,c) is stored in dataOpp_, and * the probability P(xp|xc,c) is stored in data_ */ - /* - int base = pairOffset_[i * nFeatures_ + j]; - int blockSize = states_[i] * states_[j]; - return base + c * blockSize + (si * states_[j] + sj); - */ - // index = compute_index(parent, instance[parent], child, instance[child], classVal); idx = base + c; spodeProbs[child][c] *= data_[idx]; - // spodeProbs[child][c] *= data_.at(index); spodeProbs[parent][c] *= dataOpp_[idx]; - // spodeProbs[parent][c] *= dataOpp_.at(index); } } } + if (spode != -1) { + // no need to use significance_models_ if we are predicting with a single spode + normalize(spodeProbs[spode]); + return spodeProbs[spode]; + } /* add all the probabilities for each class */ for (int c = 0; c < statesClass_; ++c) { for (int i = 0; i < nFeatures_; ++i) { - probs[c] += spodeProbs[i][c]; + probs[c] += spodeProbs[i][c] * significance_models_[i]; } } // Normalize the probabilities @@ -433,6 +483,7 @@ namespace platform { // classCounts_[c] std::vector classCounts_; + std::vector classPriors_; // => p(c) // For p(x_i=si| c), we store counts in classFeatureCounts_ => offset by featureClassOffset_[i] std::vector featureClassOffset_;