mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 08:25:51 +00:00
Add Entropy method
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
#include "CPPFImdlp.h"
|
#include "CPPFImdlp.h"
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include "Metrics.h"
|
||||||
namespace CPPFImdlp
|
namespace CPPFImdlp
|
||||||
{
|
{
|
||||||
CPPFImdlp::CPPFImdlp() : debug(false), precision(6)
|
CPPFImdlp::CPPFImdlp() : debug(false), precision(6)
|
||||||
@@ -17,33 +18,35 @@ namespace CPPFImdlp
|
|||||||
std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
|
std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
|
||||||
{
|
{
|
||||||
std::vector<float> cutPts;
|
std::vector<float> cutPts;
|
||||||
float antx, cutPoint;
|
float xPrev, cutPoint;
|
||||||
int anty;
|
int yPrev;
|
||||||
std::vector<size_t> indices = sortIndices(X);
|
std::vector<size_t> indices = sortIndices(X);
|
||||||
antx = X.at(indices[0]);
|
xPrev = X.at(indices[0]);
|
||||||
anty = y.at(indices[0]);
|
yPrev = y.at(indices[0]);
|
||||||
for (auto index = indices.begin(); index != indices.end(); ++index)
|
|
||||||
{
|
|
||||||
// std::cout << X.at(*index) << " -> " << y.at(*index) << " // ";
|
|
||||||
// Definition 2 Cut points are always on boundaries
|
|
||||||
if (y.at(*index) != anty && antx < X.at(*index))
|
|
||||||
// Weka implementation
|
|
||||||
// if (antx < X.at(*index))
|
|
||||||
{
|
|
||||||
cutPoint = round((X.at(*index) + antx) / 2 * divider) / divider;
|
|
||||||
if (debug)
|
if (debug)
|
||||||
{
|
{
|
||||||
std::cout << "Cut point: " << (antx + X.at(*index)) / 2 << " //";
|
std::cout << "Entropy: " << Metrics::entropy(y, 0, y.size(), Metrics::numClasses(y)) << std::endl;
|
||||||
std::cout << X.at(*index) << " -> " << y.at(*index) << " anty= " << anty;
|
}
|
||||||
std::cout << "* (" << X.at(*index) << ", " << antx << ")=" << ((X.at(*index) + antx) / 2) << std::endl;
|
for (auto index = indices.begin(); index != indices.end(); ++index)
|
||||||
|
{
|
||||||
|
// Definition 2 Cut points are always on boundaries
|
||||||
|
if (y.at(*index) != yPrev && xPrev < X.at(*index))
|
||||||
|
{
|
||||||
|
cutPoint = round((X.at(*index) + xPrev) / 2 * divider) / divider;
|
||||||
|
if (debug)
|
||||||
|
{
|
||||||
|
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
|
||||||
|
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
|
||||||
|
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" << ((X.at(*index) + xPrev) / 2) << std::endl;
|
||||||
}
|
}
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
}
|
}
|
||||||
antx = X.at(*index);
|
xPrev = X.at(*index);
|
||||||
anty = y.at(*index);
|
yPrev = y.at(*index);
|
||||||
}
|
}
|
||||||
return cutPts;
|
return cutPts;
|
||||||
}
|
}
|
||||||
|
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||||
std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
|
std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
|
||||||
{
|
{
|
||||||
std::vector<size_t> idx(X.size());
|
std::vector<size_t> idx(X.size());
|
||||||
|
40
fimdlp/Metrics.cpp
Normal file
40
fimdlp/Metrics.cpp
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
#include "Metrics.h"
|
||||||
|
namespace CPPFImdlp
|
||||||
|
{
|
||||||
|
Metrics::Metrics()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
float Metrics::entropy(std::vector<int> &y, int start, int end, int nClasses)
|
||||||
|
{
|
||||||
|
float entropy = 0;
|
||||||
|
int nElements = end - start;
|
||||||
|
std::vector<int>
|
||||||
|
counts(nClasses, 0);
|
||||||
|
for (auto i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
counts[y[i]]++;
|
||||||
|
}
|
||||||
|
for (auto i = 0; i < nClasses; i++)
|
||||||
|
{
|
||||||
|
if (counts[i] > 0)
|
||||||
|
{
|
||||||
|
float p = (float)counts[i] / nElements;
|
||||||
|
entropy -= p * log2(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return entropy;
|
||||||
|
}
|
||||||
|
int Metrics::numClasses(std::vector<int> &y)
|
||||||
|
{
|
||||||
|
int nClasses = 1;
|
||||||
|
int yAnt = y.at(0);
|
||||||
|
for (auto i = y.begin(); i != y.end(); ++i)
|
||||||
|
{
|
||||||
|
if (*i != yAnt)
|
||||||
|
{
|
||||||
|
nClasses++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nClasses;
|
||||||
|
}
|
||||||
|
}
|
16
fimdlp/Metrics.h
Normal file
16
fimdlp/Metrics.h
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#ifndef METRICS_H
|
||||||
|
#define METRICS_H
|
||||||
|
#include <vector>
|
||||||
|
#include <Python.h>
|
||||||
|
#include <utility>
|
||||||
|
namespace CPPFImdlp
|
||||||
|
{
|
||||||
|
class Metrics
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Metrics();
|
||||||
|
static float entropy(std::vector<int> &, int, int, int);
|
||||||
|
static int numClasses(std::vector<int> &);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
Binary file not shown.
@@ -95,10 +95,8 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
|||||||
print("Cut points for each feature in Iris dataset:")
|
print("Cut points for each feature in Iris dataset:")
|
||||||
yz = self.y_.copy()
|
yz = self.y_.copy()
|
||||||
xz = X[:, 0].copy()
|
xz = X[:, 0].copy()
|
||||||
xzz = self.discretizer_.sort_vectors(xz, yz)
|
|
||||||
print("Xz: ", xz)
|
print("Xz: ", xz)
|
||||||
print("Yz: ", yz)
|
print("Yz: ", yz)
|
||||||
print("Xzz: ", xzz)
|
|
||||||
print("Solución:")
|
print("Solución:")
|
||||||
print("Xz*: ", np.sort(X[:, 0]))
|
print("Xz*: ", np.sort(X[:, 0]))
|
||||||
print("yz*: ", yz[np.argsort(X[:, 0])])
|
print("yz*: ", yz[np.argsort(X[:, 0])])
|
||||||
|
Reference in New Issue
Block a user