mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-17 16:45:54 +00:00
continue feature selection
This commit is contained in:
@@ -891,7 +891,6 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
|
||||
SelectKBestWeighted(k=1)
|
||||
.fit(self.X_, self.y_, weights)
|
||||
.get_feature_names_out(self.feature_names_in_)
|
||||
.tolist()[0]
|
||||
)
|
||||
# Step 2: Build & train spode with the first feature as sparent
|
||||
estimator = clone(self.estimator_)
|
||||
@@ -914,13 +913,3 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
|
||||
]
|
||||
# Step 4: Add the new model
|
||||
self.estimators_.append(estimator)
|
||||
"""
|
||||
class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
|
||||
feature_edges = [
|
||||
(sparent, f) for f in self.feature_names_in_ if f != sparent
|
||||
]
|
||||
self.weights_ = weights.copy() if weights is not None else None
|
||||
feature_edges.extend(class_edges)
|
||||
self.model_ = BayesianNetwork(feature_edges, show_progress=False)
|
||||
return self.model_
|
||||
"""
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import numpy as np
|
||||
from sklearn.feature_selection import mutual_info_classif
|
||||
|
||||
from sklearn.utils.validation import check_X_y, check_is_fitted
|
||||
|
||||
"""
|
||||
Compute the weighted mutual information between each feature and the
|
||||
@@ -24,10 +24,17 @@ entropy are given.
|
||||
|
||||
|
||||
class SelectKBestWeighted:
|
||||
def __init__(self, k):
|
||||
self.k = k
|
||||
|
||||
def fit(self, X, y, sample_weight):
|
||||
self.X_, self.y_ = check_X_y(X, y)
|
||||
self.X_ = X
|
||||
self.y_ = y
|
||||
self.n_features_in_ = X.shape[1]
|
||||
self.sample_weight_ = sample_weight
|
||||
if self.k > X.shape[1] or self.k<1:
|
||||
raise ValueError(f"k must be between 1 and {self.n_features_in_}")
|
||||
# Compute the entropy of the target variable
|
||||
entropy_y = -np.sum(
|
||||
np.multiply(
|
||||
@@ -42,7 +49,8 @@ class SelectKBestWeighted:
|
||||
# Compute the weighted entropy of each feature
|
||||
entropy_weighted = []
|
||||
for i in range(X.shape[1]):
|
||||
# Compute the weighted frequency of each unique value of the feature
|
||||
# Compute the weighted frequency of each unique value of the
|
||||
# feature
|
||||
freq_weighted = np.bincount(X[:, i], weights=sample_weight)
|
||||
freq_weighted = freq_weighted[freq_weighted != 0]
|
||||
|
||||
@@ -52,8 +60,15 @@ class SelectKBestWeighted:
|
||||
/ np.sum(sample_weight)
|
||||
)
|
||||
|
||||
# Compute the weighted mutual information between each feature and the target
|
||||
# Compute the weighted mutual information between each feature and
|
||||
# the target
|
||||
mi_weighted = mi * entropy_weighted / entropy_y
|
||||
|
||||
# Return the weighted mutual information scores
|
||||
self.mi_weighted_ = mi_weighted
|
||||
return self
|
||||
|
||||
def get_feature_names_out(self, features):
|
||||
check_is_fitted(self, ["X_", "y_", "mi_weighted_"])
|
||||
return [features[i] for i in np.argsort(self.mi_weighted_)[::-1][:self.k]]
|
||||
|
||||
|
Reference in New Issue
Block a user