Fix entroy and ig

This commit is contained in:
2022-11-28 18:03:46 +01:00
parent e9e2a66203
commit 6c507a24b0
7 changed files with 34 additions and 15 deletions

View File

@@ -1,6 +1,7 @@
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include <numeric> #include <numeric>
#include <iostream> #include <iostream>
#include <stdio.h>
#include "Metrics.h" #include "Metrics.h"
namespace CPPFImdlp namespace CPPFImdlp
{ {
@@ -20,7 +21,8 @@ namespace CPPFImdlp
std::vector<float> cutPts; std::vector<float> cutPts;
std::vector<int> cutIdx; std::vector<int> cutIdx;
float xPrev, cutPoint; float xPrev, cutPoint;
int yPrev, idxPrev; int yPrev;
size_t idxPrev;
std::vector<size_t> indices = sortIndices(X); std::vector<size_t> indices = sortIndices(X);
xPrev = X.at(indices[0]); xPrev = X.at(indices[0]);
yPrev = y.at(indices[0]); yPrev = y.at(indices[0]);
@@ -34,7 +36,7 @@ namespace CPPFImdlp
// Definition 2 Cut points are always on boundaries // Definition 2 Cut points are always on boundaries
if (y.at(*index) != yPrev && xPrev < X.at(*index)) if (y.at(*index) != yPrev && xPrev < X.at(*index))
{ {
cutPoint = round((X.at(*index) + xPrev) / 2 * divider) / divider; cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider;
if (debug) if (debug)
{ {
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //"; std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
@@ -57,6 +59,13 @@ namespace CPPFImdlp
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl; std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl; // << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
} }
std::cout << "+++++++++++++++++++++++" << std::endl;
for (size_t i = 0; i < y.size(); i++)
{
printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
return cutPts; return cutPts;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes

View File

@@ -4,7 +4,7 @@ namespace CPPFImdlp
Metrics::Metrics() Metrics::Metrics()
{ {
} }
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, int start, int end) int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, size_t start, size_t end)
{ {
int nClasses = 1; int nClasses = 1;
int yAnt = y.at(start); int yAnt = y.at(start);
@@ -18,7 +18,7 @@ namespace CPPFImdlp
} }
return nClasses; return nClasses;
} }
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int nClasses) float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, int nClasses)
{ {
float entropy = 0; float entropy = 0;
int nElements = 0; int nElements = 0;
@@ -38,7 +38,7 @@ namespace CPPFImdlp
} }
return entropy; return entropy;
} }
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses) float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, size_t cutPoint, int nClasses)
{ {
float iGain = 0.0; float iGain = 0.0;
float entropy, entropyLeft, entropyRight; float entropy, entropyLeft, entropyRight;

View File

@@ -9,9 +9,9 @@ namespace CPPFImdlp
{ {
public: public:
Metrics(); Metrics();
static int numClasses(std::vector<int> &, std::vector<size_t>, int, int); static int numClasses(std::vector<int> &, std::vector<size_t>, size_t, size_t);
static float entropy(std::vector<int> &, std::vector<size_t> &, int, int, int); static float entropy(std::vector<int> &, std::vector<size_t> &, size_t, size_t, int);
static float informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses); static float informationGain(std::vector<int> &, std::vector<size_t> &, size_t, size_t, size_t, int);
}; };
} }
#endif #endif

View File

@@ -95,13 +95,21 @@ class FImdlp(TransformerMixin, BaseEstimator):
print("Cut points for each feature in Iris dataset:") print("Cut points for each feature in Iris dataset:")
yz = self.y_.copy() yz = self.y_.copy()
xz = X[:, 0].copy() xz = X[:, 0].copy()
print("Xz: ", xz) xz = xz[np.argsort(X[:, 0])]
print("Yz: ", yz) yz = yz[np.argsort(X[:, 0])]
print("Solución:") cuts = []
print("Xz*: ", np.sort(X[:, 0])) for i in range(1, len(yz)):
print("yz*: ", yz[np.argsort(X[:, 0])]) if yz[i] != yz[i - 1] and xz[i - 1] < xz[i]:
print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})")
cuts.append((xz[i] + xz[i - 1]) / 2)
for i in range(0, 1): # self.n_features_): for i in range(0, 1): # self.n_features_):
datax = np.sort(X[:, i]) datax = np.sort(X[:, i])
Xcutpoints = self.discretizer_.cut_points(datax, self.y_) Xcutpoints = self.discretizer_.cut_points(datax, self.y_)
print(f"{self.features_[i]:20s}: {Xcutpoints}") print(f"{self.features_[i]:20s}: {Xcutpoints}")
print("Solución cut_points: ", cuts)
print(xz)
print("***********")
for i in range(0, len(yz)):
print(f"({xz[i]}, {yz[i]})")
print("***********")
return X return X

View File

@@ -7,7 +7,9 @@ X = data.data
y = data.target y = data.target
features = data.feature_names features = data.feature_names
test = FImdlp() test = FImdlp()
# Xcutpoints = test.fit(X, y, features=features).transform(X) Xcutpoints = test.fit(X, y, features=features).transform(X)
clf = CFImdlp(debug=True) clf = CFImdlp(debug=True)
print("Cut points for feature 0 in Iris dataset:") print("Cut points for feature 0 in Iris dataset:")
print(clf.cut_points(X[:, 0], y)) print(clf.cut_points(X[:, 0], y))
print("Xcut")
print(Xcutpoints)

View File

@@ -17,7 +17,7 @@ setup(
], ],
language="c++", language="c++",
include_dirs=["fimdlp"], include_dirs=["fimdlp"],
extra_compile_args=["-std=c++20"], extra_compile_args=["-std=c++2a"],
), ),
] ]
) )