mirror of
https://github.com/Doctorado-ML/mufs.git
synced 2025-08-18 00:55:53 +00:00
Fix mistake in between features merit
This commit is contained in:
26
k.py
26
k.py
@@ -21,13 +21,15 @@ mfsd = MFS(discrete=True)
|
|||||||
# X = data[:, -1:]
|
# X = data[:, -1:]
|
||||||
# y = data[:, -1]
|
# y = data[:, -1]
|
||||||
|
|
||||||
|
filename = (
|
||||||
data, meta = arff.loadarff(
|
"/Users/rmontanana/Code/stree_datasets/data/tanveer/conn-bench-sonar-min"
|
||||||
"/Users/rmontanana/Code/stree_datasets/data/tanveer/balance-scale/balance-scale.arff"
|
"es-rocks/conn-bench-sonar-mines-rocks.arff"
|
||||||
)
|
)
|
||||||
train = np.array([data["f1"], data["f2"], data["f3"], data["f4"]])
|
data, meta = arff.loadarff(filename)
|
||||||
y = data["clase"]
|
train = np.array([data[i] for i in meta])
|
||||||
X = train.T
|
X = train.T
|
||||||
|
X = X[:, :-1].astype("float64")
|
||||||
|
y = data["clase"]
|
||||||
|
|
||||||
|
|
||||||
for c in range(X.shape[1]):
|
for c in range(X.shape[1]):
|
||||||
@@ -68,10 +70,10 @@ clf = Stree(random_state=0)
|
|||||||
subf = fcfb_f
|
subf = fcfb_f
|
||||||
print("fcfb", clf.fit(X[:, subf], y).score(X[:, subf], y))
|
print("fcfb", clf.fit(X[:, subf], y).score(X[:, subf], y))
|
||||||
|
|
||||||
for c in range(X.shape[1]):
|
# for c in range(X.shape[1]):
|
||||||
for k in range(X.shape[1]):
|
# for k in range(X.shape[1]):
|
||||||
ac = 0
|
# ac = 0
|
||||||
for v in range(X[:, c].shape[0]):
|
# for v in range(X[:, c].shape[0]):
|
||||||
if X[v, c] == X[v, k]:
|
# if X[v, c] == X[v, k]:
|
||||||
ac += 1
|
# ac += 1
|
||||||
print(f"{c} {k} {ac}")
|
# print(f"{c} {k} {ac}")
|
||||||
|
111
mfs/Metrics.py
111
mfs/Metrics.py
@@ -27,6 +27,90 @@ class Metrics:
|
|||||||
"""
|
"""
|
||||||
return Metrics._compute_mi_cd(x, y, n_neighbors=3)
|
return Metrics._compute_mi_cd(x, y, n_neighbors=3)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def information_gain_cont_features(xa, xb):
|
||||||
|
"""Measures the reduction in uncertainty about the value of xb when the
|
||||||
|
value of xa continuous is known (also called mutual information)
|
||||||
|
(https://www.sciencedirect.com/science/article/pii/S0020025519303603)
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
xa : np.array
|
||||||
|
values of the continuous variable
|
||||||
|
xb : np.array
|
||||||
|
values of the continuous variable
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
Information gained
|
||||||
|
"""
|
||||||
|
return Metrics._compute_mi_cc(xa, xb, n_neighbors=3)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _compute_mi_cc(x, y, n_neighbors):
|
||||||
|
"""Compute mutual information between two continuous variables.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x, y : ndarray, shape (n_samples,)
|
||||||
|
Samples of two continuous random variables, must have an identical
|
||||||
|
shape.
|
||||||
|
|
||||||
|
n_neighbors : int
|
||||||
|
Number of nearest neighbors to search for each point, see [1]_.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
mi : float
|
||||||
|
Estimated mutual information. If it turned out to be negative it is
|
||||||
|
replace by 0.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
True mutual information can't be negative. If its estimate by a numerical
|
||||||
|
method is negative, it means (providing the method is adequate) that the
|
||||||
|
mutual information is close to 0 and replacing it by 0 is a reasonable
|
||||||
|
strategy.
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
|
||||||
|
information". Phys. Rev. E 69, 2004.
|
||||||
|
"""
|
||||||
|
|
||||||
|
n_samples = x.size
|
||||||
|
|
||||||
|
x = x.reshape((-1, 1))
|
||||||
|
y = y.reshape((-1, 1))
|
||||||
|
xy = np.hstack((x, y))
|
||||||
|
|
||||||
|
# Here we rely on NearestNeighbors to select the fastest algorithm.
|
||||||
|
nn = NearestNeighbors(metric="chebyshev", n_neighbors=n_neighbors)
|
||||||
|
|
||||||
|
nn.fit(xy)
|
||||||
|
radius = nn.kneighbors()[0]
|
||||||
|
radius = np.nextafter(radius[:, -1], 0)
|
||||||
|
|
||||||
|
# KDTree is explicitly fit to allow for the querying of number of
|
||||||
|
# neighbors within a specified radius
|
||||||
|
kd = KDTree(x, metric="chebyshev")
|
||||||
|
nx = kd.query_radius(x, radius, count_only=True, return_distance=False)
|
||||||
|
nx = np.array(nx) - 1.0
|
||||||
|
|
||||||
|
kd = KDTree(y, metric="chebyshev")
|
||||||
|
ny = kd.query_radius(y, radius, count_only=True, return_distance=False)
|
||||||
|
ny = np.array(ny) - 1.0
|
||||||
|
|
||||||
|
mi = (
|
||||||
|
digamma(n_samples)
|
||||||
|
+ digamma(n_neighbors)
|
||||||
|
- np.mean(digamma(nx + 1))
|
||||||
|
- np.mean(digamma(ny + 1))
|
||||||
|
)
|
||||||
|
|
||||||
|
return max(0, mi)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _compute_mi_cd(c, d, n_neighbors):
|
def _compute_mi_cd(c, d, n_neighbors):
|
||||||
"""Compute mutual information between continuous and discrete
|
"""Compute mutual information between continuous and discrete
|
||||||
@@ -189,6 +273,33 @@ class Metrics:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def symmetrical_unc_continuous_features(x, y):
|
||||||
|
"""Compute symmetrical uncertainty. Using Greg Ver Steeg's npeet
|
||||||
|
https://github.com/gregversteeg/NPEET
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x : np.array
|
||||||
|
values of the continuous variable
|
||||||
|
y : np.array
|
||||||
|
array of labels
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
symmetrical uncertainty
|
||||||
|
"""
|
||||||
|
|
||||||
|
return (
|
||||||
|
2.0
|
||||||
|
* Metrics.information_gain_cont_features(x, y)
|
||||||
|
/ (
|
||||||
|
Metrics.differential_entropy(x, k=len(x) - 1)
|
||||||
|
+ Metrics.entropy(y)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def symmetrical_uncertainty(x, y):
|
def symmetrical_uncertainty(x, y):
|
||||||
"""Compute symmetrical uncertainty. Normalize* information gain (mutual
|
"""Compute symmetrical uncertainty. Normalize* information gain (mutual
|
||||||
|
@@ -33,6 +33,11 @@ class MFS:
|
|||||||
if discrete
|
if discrete
|
||||||
else Metrics.symmetrical_unc_continuous
|
else Metrics.symmetrical_unc_continuous
|
||||||
)
|
)
|
||||||
|
self.symmetrical_uncertainty_features = (
|
||||||
|
Metrics.symmetrical_uncertainty
|
||||||
|
if discrete
|
||||||
|
else Metrics.symmetrical_unc_continuous_features
|
||||||
|
)
|
||||||
self._fitted = False
|
self._fitted = False
|
||||||
|
|
||||||
def _initialize(self, X, y):
|
def _initialize(self, X, y):
|
||||||
@@ -93,7 +98,7 @@ class MFS:
|
|||||||
if (feature_a, feature_b) not in self._su_features:
|
if (feature_a, feature_b) not in self._su_features:
|
||||||
self._su_features[
|
self._su_features[
|
||||||
(feature_a, feature_b)
|
(feature_a, feature_b)
|
||||||
] = self.symmetrical_uncertainty(
|
] = self.symmetrical_uncertainty_features(
|
||||||
self.X_[:, feature_a], self.X_[:, feature_b]
|
self.X_[:, feature_a], self.X_[:, feature_b]
|
||||||
)
|
)
|
||||||
return self._su_features[(feature_a, feature_b)]
|
return self._su_features[(feature_a, feature_b)]
|
||||||
@@ -148,7 +153,7 @@ class MFS:
|
|||||||
candidates.append(first_candidate)
|
candidates.append(first_candidate)
|
||||||
self._scores.append(s_list[first_candidate])
|
self._scores.append(s_list[first_candidate])
|
||||||
while continue_condition:
|
while continue_condition:
|
||||||
merit = float_info.min
|
merit = -float_info.min
|
||||||
id_selected = None
|
id_selected = None
|
||||||
for idx, feature in enumerate(feature_order):
|
for idx, feature in enumerate(feature_order):
|
||||||
candidates.append(feature)
|
candidates.append(feature)
|
||||||
@@ -157,9 +162,6 @@ class MFS:
|
|||||||
id_selected = idx
|
id_selected = idx
|
||||||
merit = merit_new
|
merit = merit_new
|
||||||
candidates.pop()
|
candidates.pop()
|
||||||
if id_selected is None:
|
|
||||||
# Every merit computed is 0
|
|
||||||
break
|
|
||||||
candidates.append(feature_order[id_selected])
|
candidates.append(feature_order[id_selected])
|
||||||
self._scores.append(merit)
|
self._scores.append(merit)
|
||||||
del feature_order[id_selected]
|
del feature_order[id_selected]
|
||||||
|
Reference in New Issue
Block a user