diff --git a/bayesclass/BayesNetwork.cpp b/bayesclass/BayesNetwork.cpp index 6926d14..d1b711e 100644 --- a/bayesclass/BayesNetwork.cpp +++ b/bayesclass/BayesNetwork.cpp @@ -2879,7 +2879,12 @@ static PyObject *__pyx_pf_10bayesclass_8BayesNet_12BayesNetwork_14addEdge(struct __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_5 = __pyx_convert_string_from_py_std__in_string(__pyx_t_1); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 45, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_self->thisptr->addEdge(__pyx_t_4, __pyx_t_5); + try { + __pyx_v_self->thisptr->addEdge(__pyx_t_4, __pyx_t_5); + } catch(...) { + __Pyx_CppExn2PyErr(); + __PYX_ERR(0, 45, __pyx_L1_error) + } /* "bayesclass/BayesNetwork.pyx":44 * def addNode(self, name, states): diff --git a/bayesclass/BayesNetwork.pyx b/bayesclass/BayesNetwork.pyx index 38db620..0e19cf9 100644 --- a/bayesclass/BayesNetwork.pyx +++ b/bayesclass/BayesNetwork.pyx @@ -11,12 +11,12 @@ cdef extern from "Network.h" namespace "bayesnet": vector[int] predict(vector[vector[int]]&) vector[vector[double]] predict_proba(vector[vector[int]]&) float score(const vector[vector[int]]&, const vector[int]&) - void addNode(string, int); - void addEdge(string, string); - vector[string] getFeatures(); - int getClassNumStates(); - int getStates(); - string getClassName(); + void addNode(string, int) + void addEdge(string, string) except + + vector[string] getFeatures() + int getClassNumStates() + int getStates() + string getClassName() string version() cdef class BayesNetwork: diff --git a/bayesclass/clfs.py b/bayesclass/clfs.py index 3521729..b154e95 100644 --- a/bayesclass/clfs.py +++ b/bayesclass/clfs.py @@ -368,7 +368,7 @@ class KDB(BayesBase): max_minfo = np.argmax(cond_w[idx, :]) if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta: try: - self.add_edge( + self.model_.addEdge( self.feature_names_in_[max_minfo], self.feature_names_in_[idx], ) @@ -399,7 +399,6 @@ class KDB(BayesBase): Compute the conditional probabilility infered by the structure of BN by using counts from DB, and output BN. """ - super()._build(kwargs) # 1. get the mutual information between each feature and the class mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True) # 2. symmetric matrix where each element represents I(X, Y| class_node) @@ -410,53 +409,31 @@ class KDB(BayesBase): self.class_name_, self.n_classes_, ) - conditional_weights = metrics.conditionalEdgeWeights(self.n_features_in_ + 1) - ''' - # Step 1: Compute edge weights for a fully connected graph. - n_vars = len(data.columns) - pbar = combinations(data.columns, 2) - if show_progress and SHOW_PROGRESS: - pbar = tqdm(pbar, total=(n_vars * (n_vars - 1) / 2), desc="Building tree") - - def _conditional_edge_weights_fn(u, v): - """ - Computes the conditional edge weight of variable index u and v conditioned on class_node - """ - cond_marginal = data.loc[:, class_node].value_counts() / data.shape[0] - cond_edge_weight = 0 - for index, marg_prob in cond_marginal.items(): - df_cond_subset = data[data.loc[:, class_node] == index] - cond_edge_weight += marg_prob * edge_weights_fn( - df_cond_subset.loc[:, u], df_cond_subset.loc[:, v] - ) - return cond_edge_weight - - vals = Parallel(n_jobs=1, prefer="threads")( - delayed(_conditional_edge_weights_fn)(u, v) for u, v in pbar + conditional_weights = metrics.conditionalEdgeWeights( + self.n_features_in_ + 1 ) - weights = np.zeros((n_vars, n_vars)) - indices = np.triu_indices(n_vars, k=1) - weights[indices] = vals - weights.T[indices] = vals - - return weights - ''' - # 3. Let the used variable list, S, be empty. S_nodes = [] + num_states = { + feature: len(states) + for feature, states in kwargs["state_names"].items() + } # 4. Let the DAG being constructed, BN, begin with a single class node + self.model_ = BayesNetwork() + self.model_.addNode(self.class_name_, self.n_classes_) # 5. Repeat until S includes all domain features # 5.1 Select feature Xmax which is not in S and has the largest value for idx in np.argsort(mutual): # 5.2 Add a node to BN representing Xmax. feature = self.feature_names_in_[idx] + self.model_.addNode(feature, num_states[feature]) # 5.3 Add an arc from C to Xmax in BN. - self.edges_.append(self.class_name_, feature) + self.model_.addEdge(self.class_name_, feature) # 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S self._add_m_edges(idx, S_nodes, conditional_weights) # 5.5 Add Xmax to S. S_nodes.append(idx) - self.dag_ = dag + self.edges_ = [] def build_spodes(features, class_name):