@@ -3,420 +3,449 @@
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
# include <limits>
# include <algorithm>
# include <numeric>
# include <cmath>
# include <stdexcept>
# include <sstream>
# include "XSPODE.h"
# include "bayesnet/utils/TensorUtils.h"
# include <algorithm>
# include <cmath>
# include <limits>
# include <numeric>
# include <sstream>
# include <stdexcept>
namespace bayesnet {
// --------------------------------------
// Constructor
// --------------------------------------
XSpode : : XSpode ( int spIndex )
: superParent_ { spIndex } ,
nFeatu res _ { 0 } ,
statesClass_ { 0 } ,
alpha_ { 1.0 } ,
initializer_ { 1.0 } ,
semaphore_ { CountingSemaphore : : getInstance ( ) } , Classifier ( Network ( ) )
{
validHyperparameters = { " parent " } ;
// --------------------------------------
// Constructor
// --------------------------------------
XSpode : : XSpode ( int spIndex )
: superParent_ { spIndex } , nFeatures_ { 0 } , statesClass_ { 0 } , alpha_ { 1.0 } ,
initializer_ { 1.0 } , semapho re_{ CountingSemaphore : : getInstance ( ) } ,
Classifier ( Network ( ) )
{
validHyperparameters = { " parent " } ;
}
void XSpode : : setHyperparameters ( const nlohmann : : json & hyperparameters_ )
{
auto hyperparameters = hyperparameters_ ;
if ( hyperparameters . contains ( " parent " ) ) {
superParent_ = hyperparameters [ " parent " ] ;
hyperparameters . erase ( " parent " ) ;
}
Classifier : : setHyperparameters ( hyperparameters ) ;
}
void XSpode : : fit ( torch : : Tensor & X , torch : : Tensor & y , torch : : Tensor & weights_ , const Smoothing_t smoothing )
{
m = X . size ( 1 ) ;
n = X . size ( 0 ) ;
dataset = X ;
buildDataset ( y ) ;
buildModel ( weights_ ) ;
trainModel ( weights_ , smoothing ) ;
fitted = true ;
}
// --------------------------------------
// trainModel
// --------------------------------------
// Initialize storage needed for the super-parent and child features counts and
// probs.
// --------------------------------------
void XSpode : : buildModel ( const torch : : Tensor & weights )
{
int numInstances = m ;
nFeatures_ = n ;
// Derive the number of states for each feature and for the class.
// (This is just one approach; adapt to match your environment.)
// Here, we assume the user also gave us the total #states per feature in e.g.
// statesMap. We'll simply reconstruct the integer states_ array. The last
// entry is statesClass_.
states_ . resize ( nFeatures_ ) ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
// Suppose you look up in “statesMap” by the feature name, or read directly
// from X. We'll assume states_[f] = max value in X[f] + 1.
states_ [ f ] = dataset [ f ] . max ( ) . item < int > ( ) + 1 ;
}
// For the class: states_.back() = max(y)+1
statesClass_ = dataset [ - 1 ] . max ( ) . item < int > ( ) + 1 ;
// Initialize counts
classCounts_ . resize ( statesClass_ , 0.0 ) ;
// p(x_sp = spVal | c)
// We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
spFeatureCounts_ . resize ( states_ [ superParent_ ] * statesClass_ , 0.0 ) ;
// For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of
// childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]
// * statesClass_ * states_[sp]). We also need an offset for each child to
// index into childCounts_.
childOffsets_ . resize ( nFeatures_ , - 1 ) ;
int totalSize = 0 ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
if ( f = = superParent_ )
continue ; // skip sp
childOffsets_ [ f ] = totalSize ;
// block size for this child's counts: states_[f] * statesClass_ *
// states_[superParent_]
totalSize + = ( states_ [ f ] * statesClass_ * states_ [ superParent_ ] ) ;
}
childCounts_ . resize ( totalSize , 0.0 ) ;
}
// --------------------------------------
// buildModel
// --------------------------------------
//
// We only store conditional probabilities for:
// p(x_sp| c) (the super-parent feature)
// p(x_child| c, x_sp) for all child ≠ sp
//
// --------------------------------------
void XSpode : : trainModel ( const torch : : Tensor & weights ,
const bayesnet : : Smoothing_t smoothing )
{
// Accumulate raw counts
for ( int i = 0 ; i < m ; i + + ) {
std : : vector < int > instance ( nFeatures_ + 1 ) ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
instance [ f ] = dataset [ f ] [ i ] . item < int > ( ) ;
}
instance [ nFeatures_ ] = dataset [ - 1 ] [ i ] . item < int > ( ) ;
addSample ( instance , weights [ i ] . item < double > ( ) ) ;
}
switch ( smoothing ) {
case bayesnet : : Smoothing_t : : ORIGINAL :
alpha_ = 1.0 / m ;
break ;
case bayesnet : : Smoothing_t : : LAPLACE :
alpha_ = 1.0 ;
break ;
default :
alpha_ = 0.0 ; // No smoothing
}
initializer_ = std : : numeric_limits < double > : : max ( ) /
( nFeatures_ * nFeatures_ ) ; // for numerical stability
// Convert raw counts to probabilities
computeProbabilities ( ) ;
}
// --------------------------------------
// addSample
// --------------------------------------
//
// instance has size nFeatures_ + 1, with the class at the end.
// We add 1 to the appropriate counters for each (c, superParentVal, childVal).
//
void XSpode : : addSample ( const std : : vector < int > & instance , double weight )
{
if ( weight < = 0.0 )
return ;
int c = instance . back ( ) ;
// (A) increment classCounts
classCounts_ [ c ] + = weight ;
// (B) increment super-parent counts => p(x_sp | c)
int spVal = instance [ superParent_ ] ;
spFeatureCounts_ [ spVal * statesClass_ + c ] + = weight ;
// (C) increment child counts => p(childVal | c, x_sp)
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
if ( f = = superParent_ )
continue ;
int childVal = instance [ f ] ;
int offset = childOffsets_ [ f ] ;
// Compute index in childCounts_.
// Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
int blockSize = states_ [ f ] * statesClass_ ;
int idx = offset + spVal * blockSize + childVal * statesClass_ + c ;
childCounts_ [ idx ] + = weight ;
}
}
// --------------------------------------
// computeProbabilities
// --------------------------------------
//
// Once all samples are added in COUNTS mode, call this to:
// p(c)
// p(x_sp = spVal | c)
// p(x_child = v | c, x_sp = s_sp)
//
// --------------------------------------
void XSpode : : computeProbabilities ( )
{
double totalCount =
std : : accumulate ( classCounts_ . begin ( ) , classCounts_ . end ( ) , 0.0 ) ;
// p(c) => classPriors_
classPriors_ . resize ( statesClass_ , 0.0 ) ;
if ( totalCount < = 0.0 ) {
// fallback => uniform
double unif = 1.0 / static_cast < double > ( statesClass_ ) ;
for ( int c = 0 ; c < statesClass_ ; c + + ) {
classPriors_ [ c ] = unif ;
}
} else {
for ( int c = 0 ; c < statesClass_ ; c + + ) {
classPriors_ [ c ] =
( classCounts_ [ c ] + alpha_ ) / ( totalCount + alpha_ * statesClass_ ) ;
}
}
void XSpode : : setHyperparameters ( const nlohmann : : json & hyperparameters_ )
{
auto hyperparameters = hyperparameters_ ;
if ( hyperparameters . contains ( " parent " ) ) {
superParent_ = hyperparameters [ " p arent" ] ;
hyperparameters . erase ( " parent " ) ;
}
Classifier : : setHyperparameters ( hyperparameters ) ;
// p(x_sp | c)
spFeatureProbs_ . resize ( spFeatureCounts_ . size ( ) ) ;
// denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *
// (#states of sp)
int spCard = states_ [ superP arent_ ] ;
for ( int spVal = 0 ; spVal < spCard ; spVal + + ) {
for ( int c = 0 ; c < statesClass_ ; c + + ) {
double denom = classCounts_ [ c ] + alpha_ * spCard ;
double num = spFeatureCounts_ [ spVal * statesClass_ + c ] + alpha_ ;
spFeatureProbs_ [ spVal * statesClass_ + c ] = ( denom < = 0.0 ? 0.0 : num / denom ) ;
}
}
void XSpode : : fit ( std : : vector < std : : vector < int > > & X , std : : vector < int > & y , torch : : Tensor & weights_ , const Smoothing_t smoothing )
{
m = X [ 0 ] . size ( ) ;
n = X . size ( ) ;
buildModel ( weights_ ) ;
trainModel ( weights_ , smoothing ) ;
fitted = true ;
// p(x_child | c, x_sp)
childProbs_ . resize ( childCounts_ . size ( ) ) ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
if ( f = = superParent_ )
continue ;
int offset = childOffsets_ [ f ] ;
int childCard = states_ [ f ] ;
// For each spVal, c, childVal in childCounts_:
for ( int spVal = 0 ; spVal < spCard ; spVal + + ) {
for ( int childVal = 0 ; childVal < childCard ; childVal + + ) {
for ( int c = 0 ; c < statesClass_ ; c + + ) {
int idx = offset + spVal * ( childCard * statesClass_ ) +
childVal * statesClass_ + c ;
double num = childCounts_ [ idx ] + alpha_ ;
// denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *
// (#states of child)
double denom =
spFeatureCounts_ [ spVal * statesClass_ + c ] + alpha_ * childCard ;
childProbs_ [ idx ] = ( denom < = 0.0 ? 0.0 : num / denom ) ;
}
}
}
}
}
// --------------------------------------
// predict_proba
// --------------------------------------
//
// For a single instance x of dimension nFeatures_:
// P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
//
// --------------------------------------
std : : vector < double > XSpode : : predict_proba ( const std : : vector < int > & instance ) const
{
if ( ! fitted ) {
throw std : : logic_error ( CLASSIFIER_NOT_FITTED ) ;
}
std : : vector < double > probs ( statesClass_ , 0.0 ) ;
// Multiply p(c) × p(x_sp | c)
int spVal = instance [ superParent_ ] ;
for ( int c = 0 ; c < statesClass_ ; c + + ) {
double pc = classPriors_ [ c ] ;
double pSpC = spFeatureProbs_ [ spVal * statesClass_ + c ] ;
probs [ c ] = pc * pSpC * initializer_ ;
}
// --------------------------------------
// trainModel
// --------------------------------------
// Initialize storage needed for the super-parent and child features counts and probs.
// --------------------------------------
void XSpode : : buildModel ( const torch : : Tensor & weights )
{
int numInstances = m ;
nFeatures_ = n ;
// Derive the number of states for each feature and for the class.
// (This is just one approach; adapt to match your environment.)
// Here, we assume the user also gave us the total #states per feature in e.g. statesMap.
// We'll simply reconstruct the integer states_ array. The last entry is statesClass_.
states_ . resize ( nFeatures_ ) ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
// Suppose you look up in “statesMap” by the feature name, or read directly from X.
// We'll assume states_[f] = max value in X[f] + 1.
states_ [ f ] = dataset [ f ] . max ( ) . item < int > ( ) + 1 ;
}
// For the class: states_.back() = max(y)+1
statesClass_ = dataset [ - 1 ] . max ( ) . item < int > ( ) + 1 ;
// Initialize counts
classCounts_ . resize ( statesClass_ , 0.0 ) ;
// p(x_sp = spVal | c)
// We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
spFeatureCounts_ . resize ( states_ [ superParent_ ] * statesClass_ , 0.0 ) ;
// For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of childCounts_.
// childCounts_ will be sized as sum_{child≠sp} (states_[child] * statesClass_ * states_[sp]).
// We also need an offset for each child to index into childCounts_.
childOffsets_ . resize ( nFeatures_ , - 1 ) ;
int totalSize = 0 ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
if ( f = = superParent_ ) continue ; // skip sp
childOffsets_ [ f ] = totalSize ;
// block size for this child's counts: states_[f] * statesClass_ * states_[superParent_]
totalSize + = ( states_ [ f ] * statesClass_ * states_ [ superParent_ ] ) ;
}
childCounts_ . resize ( totalSize , 0.0 ) ;
}
// --------------------------------------
// buildModel
// --------------------------------------
//
// We only store conditional probabilities for:
// p(x_sp| c) (the super-parent feature)
// p(x_child| c, x_sp) for all child ≠ sp
//
// --------------------------------------
void XSpode : : trainModel ( const torch : : Tensor & weights , const bayesnet : : Smoothing_t smoothing )
{
// Accumulate raw counts
for ( int i = 0 ; i < m ; i + + ) {
std : : vector < int > instance ( nFeatures_ + 1 ) ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
instance [ f ] = dataset [ f ] [ i ] . item < int > ( ) ;
}
instance [ nFeatures_ ] = dataset [ - 1 ] [ i ] . item < int > ( ) ;
addSample ( instance , weights [ i ] . item < double > ( ) ) ;
}
switch ( smoothing ) {
case bayesnet : : Smoothing_t : : ORIGINAL :
alpha_ = 1.0 / m ;
break ;
case bayesnet : : Smoothing_t : : LAPLACE :
alpha_ = 1.0 ;
break ;
default :
alpha_ = 0.0 ; // No smoothing
}
initializer_ = std : : numeric_limits < double > : : max ( ) / ( nFeatures_ * nFeatures_ ) ; // for numerical stability
// Convert raw counts to probabilities
computeProbabilities ( ) ;
// Multiply by each child’ s probability p(x_child | c, x_sp)
for ( int feature = 0 ; feature < nFeatures_ ; feature + + ) {
if ( feature = = superParent_ )
continue ; // skip sp
int sf = instance [ feature ] ;
int offset = childOffsets_ [ feature ] ;
int childCard = states_ [ feature ] ; // not used directly, but for clarity
// Index into childProbs_ = offset + spVal*(childCard*statesClass_) +
// childVal*statesClass_ + c
int base = offset + spVal * ( childCard * statesClass_ ) + sf * statesClass_ ;
for ( int c = 0 ; c < statesClass_ ; c + + ) {
probs [ c ] * = childProbs_ [ base + c ] ;
}
}
// --------------------------------------
// addSample
// --------------------------------------
//
// instance has size nFeatures_ + 1, with the class at the end.
// We add 1 to the appropriate counters for each (c, superParentVal, childVal).
//
void XSpode : : addSample ( const std : : vector < int > & instance , double weight )
{
if ( weight < = 0.0 ) return ;
// Normalize
normalize ( probs ) ;
return probs ;
}
std : : vector < std : : vector < double > > XSpode : : predict_proba ( std : : vector < std : : vector < int > > & test_data )
{
int test_size = test_data [ 0 ] . size ( ) ;
int sample_size = test_data . size ( ) ;
auto probabilities = std : : vector < std : : vector < double > > (
test_size , std : : vector < double > ( statesClass_ ) ) ;
int c = instance . back ( ) ;
// (A) increment classCounts
classCounts_ [ c ] + = weight ;
// (B) increment super-parent counts => p(x_sp | c)
int spVal = instance [ superParent_ ] ;
spFeatureCounts_ [ spVal * statesClass_ + c ] + = weight ;
// (C) increment child counts => p(childVal | c, x_sp)
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
if ( f = = superParent_ ) continue ;
int childVal = instance [ f ] ;
int offset = childOffsets_ [ f ] ;
// Compute index in childCounts_.
// Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
int blockSize = states_ [ f ] * statesClass_ ;
int idx = offset + spVal * blockSize + childVal * statesClass_ + c ;
childCounts_ [ idx ] + = weight ;
}
}
// --------------------------------------
// computeProbabilities
// --------------------------------------
//
// Once all samples are added in COUNTS mode, call this to:
// p(c)
// p(x_sp = spVal | c)
// p(x_child = v | c, x_sp = s_sp)
//
// --------------------------------------
void XSpode : : computeProbabilities ( )
{
double totalCount = std : : accumulate ( classCounts_ . begin ( ) , classCounts_ . end ( ) , 0.0 ) ;
// p(c) => classPriors_
classPriors_ . resize ( statesClass_ , 0.0 ) ;
if ( totalCount < = 0.0 ) {
// fallback => uniform
double unif = 1.0 / static_cast < double > ( statesClass_ ) ;
for ( int c = 0 ; c < statesClass_ ; c + + ) {
classPriors_ [ c ] = unif ;
}
} else {
for ( int c = 0 ; c < statesClass_ ; c + + ) {
classPriors_ [ c ] = ( classCounts_ [ c ] + alpha_ )
/ ( totalCount + alpha_ * statesClass_ ) ;
}
}
// p(x_sp | c)
spFeatureProbs_ . resize ( spFeatureCounts_ . size ( ) ) ;
// denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ * (#states of sp)
int spCard = states_ [ superParent_ ] ;
for ( int spVal = 0 ; spVal < spCard ; spVal + + ) {
for ( int c = 0 ; c < statesClass_ ; c + + ) {
double denom = classCounts_ [ c ] + alpha_ * spCard ;
double num = spFeatureCounts_ [ spVal * statesClass_ + c ] + alpha_ ;
spFeatureProbs_ [ spVal * statesClass_ + c ] = ( denom < = 0.0 ? 0.0 : num / denom ) ;
}
}
// p(x_child | c, x_sp)
childProbs_ . resize ( childCounts_ . size ( ) ) ;
for ( int f = 0 ; f < nFeatures_ ; f + + ) {
if ( f = = superParent_ ) continue ;
int offset = childOffsets_ [ f ] ;
int childCard = states_ [ f ] ;
// For each spVal, c, childVal in childCounts_:
for ( int spVal = 0 ; spVal < spCard ; spVal + + ) {
for ( int childVal = 0 ; childVal < childCard ; childVal + + ) {
for ( int c = 0 ; c < statesClass_ ; c + + ) {
int idx = offset + spVal * ( childCard * statesClass_ )
+ childVal * statesClass_
+ c ;
double num = childCounts_ [ idx ] + alpha_ ;
// denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * (#states of child)
double denom = spFeatureCounts_ [ spVal * statesClass_ + c ]
+ alpha_ * childCard ;
childProbs_ [ idx ] = ( denom < = 0.0 ? 0.0 : num / denom ) ;
}
}
}
}
}
// --------------------------------------
// predict_proba
// --------------------------------------
//
// For a single instance x of dimension nFeatures_:
// P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
//
// --------------------------------------
std : : vector < double > XSpode : : predict_proba ( const std : : vector < int > & instance ) const
{
if ( ! fitted ) {
throw std : : logic_error ( CLASSIFIER_NOT_FITTED ) ;
}
std : : vector < double > probs ( statesClass_ , 0.0 ) ;
// Multiply p(c) × p(x_sp | c)
int spVal = instance [ superParent_ ] ;
for ( int c = 0 ; c < statesClass_ ; c + + ) {
double pc = classPriors_ [ c ] ;
double pSpC = spFeatureProbs_ [ spVal * statesClass_ + c ] ;
probs [ c ] = pc * pSpC * initializer_ ;
}
// Multiply by each child’ s probability p(x_child | c, x_sp)
for ( int feature = 0 ; feature < nFeatures_ ; feature + + ) {
if ( feature = = superParent_ ) continue ; // skip sp
int sf = instance [ feature ] ;
int offset = childOffsets_ [ feature ] ;
int childCard = states_ [ feature ] ; // not used directly, but for clarity
// Index into childProbs_ = offset + spVal*(childCard*statesClass_) + childVal*statesClass_ + c
int base = offset + spVal * ( childCard * statesClass_ ) + sf * statesClass_ ;
for ( int c = 0 ; c < statesClass_ ; c + + ) {
probs [ c ] * = childProbs_ [ base + c ] ;
}
}
// Normalize
normalize ( probs ) ;
return probs ;
}
std : : vector < std : : vector < double > > XSpode : : predict_proba ( std : : vector < std : : vector < int > > & test_data )
{
int test_size = test_data [ 0 ] . size ( ) ;
int sample_size = test_data . size ( ) ;
auto probabilities = std : : vector < std : : vector < double > > ( test_size , std : : vector < double > ( statesClass_ ) ) ;
int chunk_size = std : : min ( 150 , int ( test_size / semaphore_ . getMaxCount ( ) ) + 1 ) ;
std : : vector < std : : thread > threads ;
auto worker = [ & ] ( const std : : vector < std : : vector < int > > & samples , int begin , int chunk , int sample_size , std : : vector < std : : vector < double > > & predictions ) {
std : : string threadName = " (V)PWorker- " + std : : to_string ( begin ) + " - " + std : : to_string ( chunk ) ;
int chunk_size = std : : min ( 150 , int ( test_size / semaphore_ . getMaxCount ( ) ) + 1 ) ;
std : : vector < std : : thread > threads ;
auto worker = [ & ] ( const std : : vector < std : : vector < int > > & samples , int begin ,
int chunk , int sample_size ,
std : : vector < std : : vector < double > > & predictions ) {
std : : string threadName =
" (V)PWorker- " + std : : to_string ( begin ) + " - " + std : : to_string ( chunk ) ;
# if defined(__linux__)
pthread_setname_np ( pthread_self ( ) , threadName . c_str ( ) ) ;
pthread_setname_np ( pthread_self ( ) , threadName . c_str ( ) ) ;
# else
pthread_setname_np ( threadName . c_str ( ) ) ;
pthread_setname_np ( threadName . c_str ( ) ) ;
# endif
std : : vector < int > instance ( sample_size ) ;
for ( int sample = begin ; sample < begin + chunk ; + + sample ) {
for ( int feature = 0 ; feature < sample_size ; + + feature ) {
instance [ feature ] = samples [ feature ] [ sample ] ;
}
predictions [ sample ] = predict_proba ( instance ) ;
}
semaphore_ . release ( ) ;
} ;
for ( int begin = 0 ; begin < test_size ; begin + = chunk_size ) {
int chunk = std : : min ( chunk_size , test_size - begin ) ;
semaphore_ . acquire ( ) ;
threads . emplace_back ( worker , test_data , begin , chunk , sample_size , std : : ref ( probabilities ) ) ;
std : : vector < int > instance ( sample_size ) ;
for ( int sample = begin ; sample < begin + chunk ; + + sample ) {
for ( int feature = 0 ; feature < sample_size ; + + feature ) {
instance [ feature ] = samples [ feature ] [ sample ] ;
}
predictions [ sample ] = predict_proba ( instance ) ;
}
for ( auto & thread : threads ) {
thread . join ( ) ;
}
return probabilities ;
semaphore_ . release ( ) ;
} ;
for ( int begin = 0 ; begin < test_size ; begin + = chunk_size ) {
int chunk = std : : min ( chunk_size , test_size - begin ) ;
semaphore_ . acquire ( ) ;
threads . emplace_back ( worker , test_data , begin , chunk , sample_size , std : : ref ( probabilities ) ) ;
}
for ( auto & thread : threads ) {
thread . join ( ) ;
}
return probabilities ;
}
// --------------------------------------
// Utility: normalize
// --------------------------------------
void XSpode : : normalize ( std : : vector < double > & v ) const
{
double sum = 0.0 ;
for ( auto val : v ) { sum + = val ; }
if ( sum < = 0.0 ) {
return ;
}
for ( auto & val : v ) {
val / = sum ;
}
// --------------------------------------
// Utility: normalize
// --------------------------------------
void XSpode : : normalize ( std : : vector < double > & v ) const
{
double sum = 0.0 ;
for ( auto val : v ) {
sum + = val ;
}
if ( sum < = 0.0 ) {
return ;
}
for ( auto & val : v ) {
val / = sum ;
}
}
// --------------------------------------
// representation of the model
// --------------------------------------
std : : string XSpode : : to_string ( ) const
{
std : : ostringstream oss ;
oss < < " ---- SPODE Model ---- " < < std : : endl
< < " nFeatures_ = " < < nFeatures_ < < std : : endl
< < " superParent_ = " < < superParent_ < < std : : endl
< < " statesClass_ = " < < statesClass_ < < std : : endl
< < std : : endl ;
// --------------------------------------
// representation of the model
// --------------------------------------
std : : string XSpode : : to_string ( ) const
{
std : : ostringstream oss ;
oss < < " ----- XSpode Model - ---- " < < std : : endl
< < " nFeatures_ = " < < nFeatures_ < < std : : endl
< < " superParent_ = " < < superParent_ < < std : : endl
< < " statesClass_ = " < < statesClass_ < < std : : endl
< < std : : endl ;
oss < < " States: [ " ;
for ( int s : states_ ) oss < < s < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " classCounts_: [ " ;
for ( double c : classCounts_ ) oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " classPriors_: [ " ;
for ( double c : classPriors_ ) oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " spFeatureCounts_: size = " < < spFeatureCounts_ . size ( ) < < std : : endl < < " [ " ;
for ( double c : spFeatureCounts_ ) oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " spFeatureProb s_: size = " < < spFeatureProb s_ . size ( ) < < std : : endl < < " [ " ;
for ( double c : spFeatureProbs_ ) oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " childCounts_: size = " < < childCounts_ . size ( ) < < std : : endl < < " [ " ;
for ( double cc : childCounts_ ) oss < < cc < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " States: [ " ;
for ( int s : states_ )
oss < < s < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " classCounts_: [ " ;
for ( double c : classCounts_ )
oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " classPriors_: [ " ;
for ( double c : classPriors_ )
oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " spFeatureCount s_: size = " < < spFeatureCount s_ . size ( ) < < std : : endl
< < " [ " ;
for ( double c : spFeatureCounts_ )
oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " spFeatureProbs_: size = " < < spFeatureProbs_ . size ( ) < < std : : endl
< < " [ " ;
for ( double c : spFeatureProbs_ )
oss < < c < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " childCounts_: size = " < < childCounts_ . size ( ) < < std : : endl < < " [ " ;
for ( double cc : childCounts_ )
oss < < cc < < " " ;
oss < < " ] " < < std : : endl ;
for ( double cp : childProbs_ ) oss < < cp < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " childOffsets_: [ " ;
for ( int co : childOffsets_ ) oss < < co < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " --------------------- " < < std : : endl ;
return oss . str ( ) ;
}
int XSpode : : getNumberOfNodes ( ) const { return nFeatures_ + 1 ; }
int XSpode : : getClassNumStates ( ) const { return statesClass_ ; }
int XSpode : : getNFeatur es ( ) const { return nFeatures_ ; }
int XSpode : : getNumberOf States ( ) const
{
return std : : accumulate ( states_ . begin ( ) , states_ . end ( ) , 0 ) * nFeatures_ ;
}
int XSpode : : getNumberOfEdges ( ) const
{
return nFeatures_ * ( 2 * nFeatures_ - 1 ) ;
}
std : : vector < int > & XSpode : : getStates ( ) { return states_ ; }
for ( double cp : childProbs_ )
oss < < cp < < " " ;
oss < < " ] " < < std : : endl ;
oss < < " childOffsets_: [ " ;
for ( int co : childOffsets_ )
oss < < co < < " " ;
oss < < " ] " < < std : : endl ;
oss < < std : : string ( 40 , ' - ' ) < < std : : endl ;
return oss . str ( ) ;
}
int XSpode : : getNumberOfNod es ( ) const { return nFeatures_ + 1 ; }
int XSpode : : getClassNum States ( ) const { return statesClass_ ; }
int XSpode : : getNFeatures ( ) const { return nFeatures_ ; }
int XSpode : : getNumberOfStates ( ) const
{
return std : : accumulate ( states_ . begin ( ) , states_ . end ( ) , 0 ) * nFeatures_ ;
}
int XSpode : : getNumberOfEdges ( ) const
{
return nFeatures_ * ( 2 * nFeatures_ - 1 ) ;
}
std : : vector < int > & XSpode : : getStates ( ) { return states_ ; }
// ------------------------------------------------------
// Predict overrides (classifier interface)
// ------------------------------------------------------
int XSpode : : predict ( const std : : vector < int > & instance ) const
{
auto p = predict_proba ( instance ) ;
return static_cast < int > ( std : : distance ( p . begin ( ) ,
std : : max_element ( p . begin ( ) , p . end ( ) ) ) ) ;
}
std : : vector < int > XSpode : : predict ( std : : vector < std : : vector < int > > & test_data )
{
auto probabilities = predict_proba ( test_data ) ;
std : : vector < int > predictions ( probabilities . size ( ) , 0 ) ;
// ------------------------------------------------------
// Predict overrides (classifier interface)
// ------------------------------------------------------
int XSpode : : predict ( const std : : vector < int > & instance ) const
{
auto p = predict_proba ( instance ) ;
return static_cast < int > ( std : : distance ( p . begin ( ) , std : : max_element ( p . begin ( ) , p . end ( ) ) ) ) ;
}
std : : vector < int > XSpode : : predict ( std : : vector < std : : vector < int > > & test_data )
{
auto probabilities = predict_proba ( test_data ) ;
std : : vector < int > predictions ( probabilities . size ( ) , 0 ) ;
for ( size_t i = 0 ; i < probabilities . size ( ) ; i + + ) {
predictions [ i ] = std : : distance ( probabilities [ i ] . begin ( ) , std : : max_element ( probabilities [ i ] . begin ( ) , probabilities [ i ] . end ( ) ) ) ;
}
return predictions ;
for ( size_t i = 0 ; i < probabilities . size ( ) ; i + + ) {
predictions [ i ] = std : : distance (
probabilities [ i ] . begin ( ) ,
std : : max_element ( probabilities [ i ] . begin ( ) , probabilities [ i ] . end ( ) ) ) ;
}
torch : : Tensor XSpode : : predict ( torch : : Tensor & X )
{
auto X_ = TensorUtils : : to_matrix ( X ) ;
auto result_v = predict ( X_ ) ;
return torch : : tensor ( result_v , torch : : kInt32 ) ;
return predictions ;
}
torch : : Tensor XSpode : : predict ( torch : : Tensor & X )
{
auto X_ = TensorUtils : : to_matrix ( X ) ;
auto result_v = predict ( X_ ) ;
return torch : : tensor ( result_v , torch : : kInt32 ) ;
}
torch : : Tensor XSpode : : predict_proba ( torch : : Tensor & X )
{
auto X_ = TensorUtils : : to_matrix ( X ) ;
auto result_v = predict_proba ( X_ ) ;
int n_samples = X . size ( 1 ) ;
torch : : Tensor result =
torch : : zeros ( { n_samples , statesClass_ } , torch : : kDouble ) ;
for ( int i = 0 ; i < result_v . size ( ) ; + + i ) {
result . index_put_ ( { i , " ... " } , torch : : tensor ( result_v [ i ] ) ) ;
}
torch : : Tensor XSpode : : predict_proba ( torch : : Tensor & X )
{
auto X_ = TensorUtils : : to_matrix ( X ) ;
auto result_v = predict_proba ( X_ ) ;
torch : : Tensor result ;
for ( int i = 0 ; i < result_v . size ( ) ; + + i ) {
result . index_put_ ( { i , " ... " } , torch : : tensor ( result_v [ i ] , torch : : kDouble ) ) ;
}
return result ;
return result ;
}
float XSpode : : score ( torch : : Tensor & X , torch : : Tensor & y )
{
torch : : Tensor y_pred = predict ( X ) ;
return ( y_pred = = y ) . sum ( ) . item < float > ( ) / y . size ( 0 ) ;
}
float XSpode : : score ( std : : vector < std : : vector < int > > & X , std : : vector < int > & y )
{
auto y_pred = this - > predict ( X ) ;
int correct = 0 ;
for ( int i = 0 ; i < y_pred . size ( ) ; + + i ) {
if ( y_pred [ i ] = = y [ i ] ) {
correct + + ;
}
}
float XSpode : : scor e( torch : : Tensor & X , torch : : Tensor & y )
{
torch : : Tensor y_pred = predict ( X ) ;
return ( y_pred = = y ) . sum ( ) . item < float > ( ) / y . size ( 0 ) ;
}
float XSpode : : score ( std : : vector < std : : vector < int > > & X , std : : vector < int > & y )
{
auto y_pred = this - > predict ( X ) ;
int correct = 0 ;
for ( int i = 0 ; i < y_pred . size ( ) ; + + i ) {
if ( y_pred [ i ] = = y [ i ] ) {
correct + + ;
}
}
return ( double ) correct / y_pred . size ( ) ;
}
}
return ( doubl e) correct / y_pred . size ( ) ;
}
} // namespace bayesnet