mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-16 16:15:57 +00:00
Complete feature_selection with weighted entropy
This commit is contained in:
@@ -891,6 +891,7 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
|
|||||||
SelectKBestWeighted(k=1)
|
SelectKBestWeighted(k=1)
|
||||||
.fit(self.X_, self.y_, weights)
|
.fit(self.X_, self.y_, weights)
|
||||||
.get_feature_names_out(self.feature_names_in_)
|
.get_feature_names_out(self.feature_names_in_)
|
||||||
|
.tolist()
|
||||||
)
|
)
|
||||||
# Step 2: Build & train spode with the first feature as sparent
|
# Step 2: Build & train spode with the first feature as sparent
|
||||||
estimator = clone(self.estimator_)
|
estimator = clone(self.estimator_)
|
||||||
@@ -898,6 +899,7 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
|
|||||||
_args["sparent"] = feature
|
_args["sparent"] = feature
|
||||||
_args["sample_weight"] = weights
|
_args["sample_weight"] = weights
|
||||||
_args["weighted"] = True
|
_args["weighted"] = True
|
||||||
|
print("I'm gonna build a spode with", feature)
|
||||||
# Step 2.1: build dataset
|
# Step 2.1: build dataset
|
||||||
# Step 2.2: Train the model
|
# Step 2.2: Train the model
|
||||||
estimator.fit(self.X_, self.y_, **_args)
|
estimator.fit(self.X_, self.y_, **_args)
|
||||||
|
@@ -1,6 +1,10 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.feature_selection import mutual_info_classif
|
from sklearn.feature_selection import mutual_info_classif
|
||||||
from sklearn.utils.validation import check_X_y, check_is_fitted
|
from sklearn.utils.validation import check_X_y, check_is_fitted
|
||||||
|
from sklearn.feature_selection._univariate_selection import (
|
||||||
|
_BaseFilter,
|
||||||
|
_clean_nans,
|
||||||
|
)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Compute the weighted mutual information between each feature and the
|
Compute the weighted mutual information between each feature and the
|
||||||
@@ -23,18 +27,38 @@ entropy are given.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class SelectKBestWeighted:
|
class SelectKBestWeighted(_BaseFilter):
|
||||||
def __init__(self, k):
|
def __init__(self, *, k=10):
|
||||||
|
super().__init__(score_func=mutual_info_classif)
|
||||||
self.k = k
|
self.k = k
|
||||||
|
|
||||||
|
def _check_params(self, X, y):
|
||||||
|
if self.k > X.shape[1] or self.k < 1:
|
||||||
|
raise ValueError(
|
||||||
|
f"k must be between 1 and {X.shape[1]} got {self.k}."
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_support_mask(self):
|
||||||
|
check_is_fitted(self)
|
||||||
|
|
||||||
|
if self.k == "all":
|
||||||
|
return np.ones(self.scores_.shape, dtype=bool)
|
||||||
|
elif self.k == 0:
|
||||||
|
return np.zeros(self.scores_.shape, dtype=bool)
|
||||||
|
else:
|
||||||
|
scores = _clean_nans(self.scores_)
|
||||||
|
mask = np.zeros(scores.shape, dtype=bool)
|
||||||
|
|
||||||
|
# Request a stable sort. Mergesort takes more memory (~40MB per
|
||||||
|
# megafeature on x86-64).
|
||||||
|
mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1
|
||||||
|
return mask
|
||||||
|
|
||||||
def fit(self, X, y, sample_weight):
|
def fit(self, X, y, sample_weight):
|
||||||
self.X_, self.y_ = check_X_y(X, y)
|
self.X_, self.y_ = check_X_y(X, y)
|
||||||
self.X_ = X
|
self._check_params(X, y)
|
||||||
self.y_ = y
|
|
||||||
self.n_features_in_ = X.shape[1]
|
self.n_features_in_ = X.shape[1]
|
||||||
self.sample_weight_ = sample_weight
|
self.sample_weight_ = sample_weight
|
||||||
if self.k > X.shape[1] or self.k<1:
|
|
||||||
raise ValueError(f"k must be between 1 and {self.n_features_in_}")
|
|
||||||
# Compute the entropy of the target variable
|
# Compute the entropy of the target variable
|
||||||
entropy_y = -np.sum(
|
entropy_y = -np.sum(
|
||||||
np.multiply(
|
np.multiply(
|
||||||
@@ -44,7 +68,7 @@ class SelectKBestWeighted:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Compute the mutual information between each feature and the target
|
# Compute the mutual information between each feature and the target
|
||||||
mi = mutual_info_classif(X, y)
|
mi = self.score_func(X, y)
|
||||||
|
|
||||||
# Compute the weighted entropy of each feature
|
# Compute the weighted entropy of each feature
|
||||||
entropy_weighted = []
|
entropy_weighted = []
|
||||||
@@ -65,10 +89,5 @@ class SelectKBestWeighted:
|
|||||||
mi_weighted = mi * entropy_weighted / entropy_y
|
mi_weighted = mi * entropy_weighted / entropy_y
|
||||||
|
|
||||||
# Return the weighted mutual information scores
|
# Return the weighted mutual information scores
|
||||||
self.mi_weighted_ = mi_weighted
|
self.scores_ = mi_weighted
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def get_feature_names_out(self, features):
|
|
||||||
check_is_fitted(self, ["X_", "y_", "mi_weighted_"])
|
|
||||||
return [features[i] for i in np.argsort(self.mi_weighted_)[::-1][:self.k]]
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user