mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Fix entroy and ig
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
#include "CPPFImdlp.h"
|
#include "CPPFImdlp.h"
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <stdio.h>
|
||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
namespace CPPFImdlp
|
namespace CPPFImdlp
|
||||||
{
|
{
|
||||||
@@ -20,7 +21,8 @@ namespace CPPFImdlp
|
|||||||
std::vector<float> cutPts;
|
std::vector<float> cutPts;
|
||||||
std::vector<int> cutIdx;
|
std::vector<int> cutIdx;
|
||||||
float xPrev, cutPoint;
|
float xPrev, cutPoint;
|
||||||
int yPrev, idxPrev;
|
int yPrev;
|
||||||
|
size_t idxPrev;
|
||||||
std::vector<size_t> indices = sortIndices(X);
|
std::vector<size_t> indices = sortIndices(X);
|
||||||
xPrev = X.at(indices[0]);
|
xPrev = X.at(indices[0]);
|
||||||
yPrev = y.at(indices[0]);
|
yPrev = y.at(indices[0]);
|
||||||
@@ -34,7 +36,7 @@ namespace CPPFImdlp
|
|||||||
// Definition 2 Cut points are always on boundaries
|
// Definition 2 Cut points are always on boundaries
|
||||||
if (y.at(*index) != yPrev && xPrev < X.at(*index))
|
if (y.at(*index) != yPrev && xPrev < X.at(*index))
|
||||||
{
|
{
|
||||||
cutPoint = round((X.at(*index) + xPrev) / 2 * divider) / divider;
|
cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider;
|
||||||
if (debug)
|
if (debug)
|
||||||
{
|
{
|
||||||
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
|
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
|
||||||
@@ -57,6 +59,13 @@ namespace CPPFImdlp
|
|||||||
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
|
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
|
||||||
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
|
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
|
||||||
}
|
}
|
||||||
|
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||||
|
for (size_t i = 0; i < y.size(); i++)
|
||||||
|
{
|
||||||
|
printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]);
|
||||||
|
}
|
||||||
|
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||||
|
|
||||||
return cutPts;
|
return cutPts;
|
||||||
}
|
}
|
||||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||||
|
@@ -4,7 +4,7 @@ namespace CPPFImdlp
|
|||||||
Metrics::Metrics()
|
Metrics::Metrics()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, int start, int end)
|
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, size_t start, size_t end)
|
||||||
{
|
{
|
||||||
int nClasses = 1;
|
int nClasses = 1;
|
||||||
int yAnt = y.at(start);
|
int yAnt = y.at(start);
|
||||||
@@ -18,7 +18,7 @@ namespace CPPFImdlp
|
|||||||
}
|
}
|
||||||
return nClasses;
|
return nClasses;
|
||||||
}
|
}
|
||||||
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int nClasses)
|
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, int nClasses)
|
||||||
{
|
{
|
||||||
float entropy = 0;
|
float entropy = 0;
|
||||||
int nElements = 0;
|
int nElements = 0;
|
||||||
@@ -38,7 +38,7 @@ namespace CPPFImdlp
|
|||||||
}
|
}
|
||||||
return entropy;
|
return entropy;
|
||||||
}
|
}
|
||||||
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses)
|
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, size_t cutPoint, int nClasses)
|
||||||
{
|
{
|
||||||
float iGain = 0.0;
|
float iGain = 0.0;
|
||||||
float entropy, entropyLeft, entropyRight;
|
float entropy, entropyLeft, entropyRight;
|
||||||
|
@@ -9,9 +9,9 @@ namespace CPPFImdlp
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Metrics();
|
Metrics();
|
||||||
static int numClasses(std::vector<int> &, std::vector<size_t>, int, int);
|
static int numClasses(std::vector<int> &, std::vector<size_t>, size_t, size_t);
|
||||||
static float entropy(std::vector<int> &, std::vector<size_t> &, int, int, int);
|
static float entropy(std::vector<int> &, std::vector<size_t> &, size_t, size_t, int);
|
||||||
static float informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses);
|
static float informationGain(std::vector<int> &, std::vector<size_t> &, size_t, size_t, size_t, int);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
Binary file not shown.
@@ -95,13 +95,21 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
|||||||
print("Cut points for each feature in Iris dataset:")
|
print("Cut points for each feature in Iris dataset:")
|
||||||
yz = self.y_.copy()
|
yz = self.y_.copy()
|
||||||
xz = X[:, 0].copy()
|
xz = X[:, 0].copy()
|
||||||
print("Xz: ", xz)
|
xz = xz[np.argsort(X[:, 0])]
|
||||||
print("Yz: ", yz)
|
yz = yz[np.argsort(X[:, 0])]
|
||||||
print("Solución:")
|
cuts = []
|
||||||
print("Xz*: ", np.sort(X[:, 0]))
|
for i in range(1, len(yz)):
|
||||||
print("yz*: ", yz[np.argsort(X[:, 0])])
|
if yz[i] != yz[i - 1] and xz[i - 1] < xz[i]:
|
||||||
|
print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})")
|
||||||
|
cuts.append((xz[i] + xz[i - 1]) / 2)
|
||||||
for i in range(0, 1): # self.n_features_):
|
for i in range(0, 1): # self.n_features_):
|
||||||
datax = np.sort(X[:, i])
|
datax = np.sort(X[:, i])
|
||||||
Xcutpoints = self.discretizer_.cut_points(datax, self.y_)
|
Xcutpoints = self.discretizer_.cut_points(datax, self.y_)
|
||||||
print(f"{self.features_[i]:20s}: {Xcutpoints}")
|
print(f"{self.features_[i]:20s}: {Xcutpoints}")
|
||||||
|
print("Solución cut_points: ", cuts)
|
||||||
|
print(xz)
|
||||||
|
print("***********")
|
||||||
|
for i in range(0, len(yz)):
|
||||||
|
print(f"({xz[i]}, {yz[i]})")
|
||||||
|
print("***********")
|
||||||
return X
|
return X
|
||||||
|
@@ -7,7 +7,9 @@ X = data.data
|
|||||||
y = data.target
|
y = data.target
|
||||||
features = data.feature_names
|
features = data.feature_names
|
||||||
test = FImdlp()
|
test = FImdlp()
|
||||||
# Xcutpoints = test.fit(X, y, features=features).transform(X)
|
Xcutpoints = test.fit(X, y, features=features).transform(X)
|
||||||
clf = CFImdlp(debug=True)
|
clf = CFImdlp(debug=True)
|
||||||
print("Cut points for feature 0 in Iris dataset:")
|
print("Cut points for feature 0 in Iris dataset:")
|
||||||
print(clf.cut_points(X[:, 0], y))
|
print(clf.cut_points(X[:, 0], y))
|
||||||
|
print("Xcut")
|
||||||
|
print(Xcutpoints)
|
||||||
|
Reference in New Issue
Block a user