Complete refactor of KDB with BayesNet library

This commit is contained in:
2023-07-12 12:07:01 +02:00
parent 2ff38f73e7
commit aef22306ef
3 changed files with 24 additions and 42 deletions

View File

@@ -2879,7 +2879,12 @@ static PyObject *__pyx_pf_10bayesclass_8BayesNet_12BayesNetwork_14addEdge(struct
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_t_5 = __pyx_convert_string_from_py_std__in_string(__pyx_t_1); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 45, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_v_self->thisptr->addEdge(__pyx_t_4, __pyx_t_5);
try {
__pyx_v_self->thisptr->addEdge(__pyx_t_4, __pyx_t_5);
} catch(...) {
__Pyx_CppExn2PyErr();
__PYX_ERR(0, 45, __pyx_L1_error)
}
/* "bayesclass/BayesNetwork.pyx":44
* def addNode(self, name, states):

View File

@@ -11,12 +11,12 @@ cdef extern from "Network.h" namespace "bayesnet":
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
void addNode(string, int);
void addEdge(string, string);
vector[string] getFeatures();
int getClassNumStates();
int getStates();
string getClassName();
void addNode(string, int)
void addEdge(string, string) except +
vector[string] getFeatures()
int getClassNumStates()
int getStates()
string getClassName()
string version()
cdef class BayesNetwork:

View File

@@ -368,7 +368,7 @@ class KDB(BayesBase):
max_minfo = np.argmax(cond_w[idx, :])
if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
try:
self.add_edge(
self.model_.addEdge(
self.feature_names_in_[max_minfo],
self.feature_names_in_[idx],
)
@@ -399,7 +399,6 @@ class KDB(BayesBase):
Compute the conditional probabilility infered by the structure of BN by
using counts from DB, and output BN.
"""
super()._build(kwargs)
# 1. get the mutual information between each feature and the class
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
# 2. symmetric matrix where each element represents I(X, Y| class_node)
@@ -410,53 +409,31 @@ class KDB(BayesBase):
self.class_name_,
self.n_classes_,
)
conditional_weights = metrics.conditionalEdgeWeights(self.n_features_in_ + 1)
'''
# Step 1: Compute edge weights for a fully connected graph.
n_vars = len(data.columns)
pbar = combinations(data.columns, 2)
if show_progress and SHOW_PROGRESS:
pbar = tqdm(pbar, total=(n_vars * (n_vars - 1) / 2), desc="Building tree")
def _conditional_edge_weights_fn(u, v):
"""
Computes the conditional edge weight of variable index u and v conditioned on class_node
"""
cond_marginal = data.loc[:, class_node].value_counts() / data.shape[0]
cond_edge_weight = 0
for index, marg_prob in cond_marginal.items():
df_cond_subset = data[data.loc[:, class_node] == index]
cond_edge_weight += marg_prob * edge_weights_fn(
df_cond_subset.loc[:, u], df_cond_subset.loc[:, v]
)
return cond_edge_weight
vals = Parallel(n_jobs=1, prefer="threads")(
delayed(_conditional_edge_weights_fn)(u, v) for u, v in pbar
conditional_weights = metrics.conditionalEdgeWeights(
self.n_features_in_ + 1
)
weights = np.zeros((n_vars, n_vars))
indices = np.triu_indices(n_vars, k=1)
weights[indices] = vals
weights.T[indices] = vals
return weights
'''
# 3. Let the used variable list, S, be empty.
S_nodes = []
num_states = {
feature: len(states)
for feature, states in kwargs["state_names"].items()
}
# 4. Let the DAG being constructed, BN, begin with a single class node
self.model_ = BayesNetwork()
self.model_.addNode(self.class_name_, self.n_classes_)
# 5. Repeat until S includes all domain features
# 5.1 Select feature Xmax which is not in S and has the largest value
for idx in np.argsort(mutual):
# 5.2 Add a node to BN representing Xmax.
feature = self.feature_names_in_[idx]
self.model_.addNode(feature, num_states[feature])
# 5.3 Add an arc from C to Xmax in BN.
self.edges_.append(self.class_name_, feature)
self.model_.addEdge(self.class_name_, feature)
# 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S
self._add_m_edges(idx, S_nodes, conditional_weights)
# 5.5 Add Xmax to S.
S_nodes.append(idx)
self.dag_ = dag
self.edges_ = []
def build_spodes(features, class_name):