Fix entroy and ig

This commit is contained in:
2022-11-28 18:03:46 +01:00
parent e9e2a66203
commit 6c507a24b0
7 changed files with 34 additions and 15 deletions

View File

@@ -1,6 +1,7 @@
#include "CPPFImdlp.h"
#include <numeric>
#include <iostream>
#include <stdio.h>
#include "Metrics.h"
namespace CPPFImdlp
{
@@ -20,7 +21,8 @@ namespace CPPFImdlp
std::vector<float> cutPts;
std::vector<int> cutIdx;
float xPrev, cutPoint;
int yPrev, idxPrev;
int yPrev;
size_t idxPrev;
std::vector<size_t> indices = sortIndices(X);
xPrev = X.at(indices[0]);
yPrev = y.at(indices[0]);
@@ -34,7 +36,7 @@ namespace CPPFImdlp
// Definition 2 Cut points are always on boundaries
if (y.at(*index) != yPrev && xPrev < X.at(*index))
{
cutPoint = round((X.at(*index) + xPrev) / 2 * divider) / divider;
cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider;
if (debug)
{
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
@@ -57,6 +59,13 @@ namespace CPPFImdlp
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
}
std::cout << "+++++++++++++++++++++++" << std::endl;
for (size_t i = 0; i < y.size(); i++)
{
printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
return cutPts;
}
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes

View File

@@ -4,7 +4,7 @@ namespace CPPFImdlp
Metrics::Metrics()
{
}
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, int start, int end)
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, size_t start, size_t end)
{
int nClasses = 1;
int yAnt = y.at(start);
@@ -18,7 +18,7 @@ namespace CPPFImdlp
}
return nClasses;
}
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int nClasses)
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, int nClasses)
{
float entropy = 0;
int nElements = 0;
@@ -38,7 +38,7 @@ namespace CPPFImdlp
}
return entropy;
}
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses)
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, size_t cutPoint, int nClasses)
{
float iGain = 0.0;
float entropy, entropyLeft, entropyRight;

View File

@@ -9,9 +9,9 @@ namespace CPPFImdlp
{
public:
Metrics();
static int numClasses(std::vector<int> &, std::vector<size_t>, int, int);
static float entropy(std::vector<int> &, std::vector<size_t> &, int, int, int);
static float informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses);
static int numClasses(std::vector<int> &, std::vector<size_t>, size_t, size_t);
static float entropy(std::vector<int> &, std::vector<size_t> &, size_t, size_t, int);
static float informationGain(std::vector<int> &, std::vector<size_t> &, size_t, size_t, size_t, int);
};
}
#endif

View File

@@ -95,13 +95,21 @@ class FImdlp(TransformerMixin, BaseEstimator):
print("Cut points for each feature in Iris dataset:")
yz = self.y_.copy()
xz = X[:, 0].copy()
print("Xz: ", xz)
print("Yz: ", yz)
print("Solución:")
print("Xz*: ", np.sort(X[:, 0]))
print("yz*: ", yz[np.argsort(X[:, 0])])
xz = xz[np.argsort(X[:, 0])]
yz = yz[np.argsort(X[:, 0])]
cuts = []
for i in range(1, len(yz)):
if yz[i] != yz[i - 1] and xz[i - 1] < xz[i]:
print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})")
cuts.append((xz[i] + xz[i - 1]) / 2)
for i in range(0, 1): # self.n_features_):
datax = np.sort(X[:, i])
Xcutpoints = self.discretizer_.cut_points(datax, self.y_)
print(f"{self.features_[i]:20s}: {Xcutpoints}")
print("Solución cut_points: ", cuts)
print(xz)
print("***********")
for i in range(0, len(yz)):
print(f"({xz[i]}, {yz[i]})")
print("***********")
return X

View File

@@ -7,7 +7,9 @@ X = data.data
y = data.target
features = data.feature_names
test = FImdlp()
# Xcutpoints = test.fit(X, y, features=features).transform(X)
Xcutpoints = test.fit(X, y, features=features).transform(X)
clf = CFImdlp(debug=True)
print("Cut points for feature 0 in Iris dataset:")
print(clf.cut_points(X[:, 0], y))
print("Xcut")
print(Xcutpoints)

View File

@@ -17,7 +17,7 @@ setup(
],
language="c++",
include_dirs=["fimdlp"],
extra_compile_args=["-std=c++20"],
extra_compile_args=["-std=c++2a"],
),
]
)