Add Entropy method

This commit is contained in:
2022-11-28 10:15:03 +01:00
parent a54c774f95
commit 3d27c4c3b7
6 changed files with 74 additions and 16 deletions

View File

@@ -1,6 +1,7 @@
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include <numeric> #include <numeric>
#include <iostream> #include <iostream>
#include "Metrics.h"
namespace CPPFImdlp namespace CPPFImdlp
{ {
CPPFImdlp::CPPFImdlp() : debug(false), precision(6) CPPFImdlp::CPPFImdlp() : debug(false), precision(6)
@@ -17,33 +18,35 @@ namespace CPPFImdlp
std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y) std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
{ {
std::vector<float> cutPts; std::vector<float> cutPts;
float antx, cutPoint; float xPrev, cutPoint;
int anty; int yPrev;
std::vector<size_t> indices = sortIndices(X); std::vector<size_t> indices = sortIndices(X);
antx = X.at(indices[0]); xPrev = X.at(indices[0]);
anty = y.at(indices[0]); yPrev = y.at(indices[0]);
for (auto index = indices.begin(); index != indices.end(); ++index)
{
// std::cout << X.at(*index) << " -> " << y.at(*index) << " // ";
// Definition 2 Cut points are always on boundaries
if (y.at(*index) != anty && antx < X.at(*index))
// Weka implementation
// if (antx < X.at(*index))
{
cutPoint = round((X.at(*index) + antx) / 2 * divider) / divider;
if (debug) if (debug)
{ {
std::cout << "Cut point: " << (antx + X.at(*index)) / 2 << " //"; std::cout << "Entropy: " << Metrics::entropy(y, 0, y.size(), Metrics::numClasses(y)) << std::endl;
std::cout << X.at(*index) << " -> " << y.at(*index) << " anty= " << anty; }
std::cout << "* (" << X.at(*index) << ", " << antx << ")=" << ((X.at(*index) + antx) / 2) << std::endl; for (auto index = indices.begin(); index != indices.end(); ++index)
{
// Definition 2 Cut points are always on boundaries
if (y.at(*index) != yPrev && xPrev < X.at(*index))
{
cutPoint = round((X.at(*index) + xPrev) / 2 * divider) / divider;
if (debug)
{
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" << ((X.at(*index) + xPrev) / 2) << std::endl;
} }
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
} }
antx = X.at(*index); xPrev = X.at(*index);
anty = y.at(*index); yPrev = y.at(*index);
} }
return cutPts; return cutPts;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X) std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
{ {
std::vector<size_t> idx(X.size()); std::vector<size_t> idx(X.size());

40
fimdlp/Metrics.cpp Normal file
View File

@@ -0,0 +1,40 @@
#include "Metrics.h"
namespace CPPFImdlp
{
Metrics::Metrics()
{
}
float Metrics::entropy(std::vector<int> &y, int start, int end, int nClasses)
{
float entropy = 0;
int nElements = end - start;
std::vector<int>
counts(nClasses, 0);
for (auto i = start; i < end; i++)
{
counts[y[i]]++;
}
for (auto i = 0; i < nClasses; i++)
{
if (counts[i] > 0)
{
float p = (float)counts[i] / nElements;
entropy -= p * log2(p);
}
}
return entropy;
}
int Metrics::numClasses(std::vector<int> &y)
{
int nClasses = 1;
int yAnt = y.at(0);
for (auto i = y.begin(); i != y.end(); ++i)
{
if (*i != yAnt)
{
nClasses++;
}
}
return nClasses;
}
}

16
fimdlp/Metrics.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef METRICS_H
#define METRICS_H
#include <vector>
#include <Python.h>
#include <utility>
namespace CPPFImdlp
{
class Metrics
{
public:
Metrics();
static float entropy(std::vector<int> &, int, int, int);
static int numClasses(std::vector<int> &);
};
}
#endif

View File

@@ -95,10 +95,8 @@ class FImdlp(TransformerMixin, BaseEstimator):
print("Cut points for each feature in Iris dataset:") print("Cut points for each feature in Iris dataset:")
yz = self.y_.copy() yz = self.y_.copy()
xz = X[:, 0].copy() xz = X[:, 0].copy()
xzz = self.discretizer_.sort_vectors(xz, yz)
print("Xz: ", xz) print("Xz: ", xz)
print("Yz: ", yz) print("Yz: ", yz)
print("Xzz: ", xzz)
print("Solución:") print("Solución:")
print("Xz*: ", np.sort(X[:, 0])) print("Xz*: ", np.sort(X[:, 0]))
print("yz*: ", yz[np.argsort(X[:, 0])]) print("yz*: ", yz[np.argsort(X[:, 0])])

View File

@@ -13,6 +13,7 @@ setup(
sources=[ sources=[
"fimdlp/cfimdlp.pyx", "fimdlp/cfimdlp.pyx",
"fimdlp/CPPFImdlp.cpp", "fimdlp/CPPFImdlp.cpp",
"fimdlp/Metrics.cpp",
], ],
language="c++", language="c++",
include_dirs=["fimdlp"], include_dirs=["fimdlp"],