mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 08:25:51 +00:00
Fix entropy and information gain
This commit is contained in:
@@ -18,14 +18,16 @@ namespace CPPFImdlp
|
|||||||
std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
|
std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
|
||||||
{
|
{
|
||||||
std::vector<float> cutPts;
|
std::vector<float> cutPts;
|
||||||
|
std::vector<int> cutIdx;
|
||||||
float xPrev, cutPoint;
|
float xPrev, cutPoint;
|
||||||
int yPrev;
|
int yPrev, idxPrev;
|
||||||
std::vector<size_t> indices = sortIndices(X);
|
std::vector<size_t> indices = sortIndices(X);
|
||||||
xPrev = X.at(indices[0]);
|
xPrev = X.at(indices[0]);
|
||||||
yPrev = y.at(indices[0]);
|
yPrev = y.at(indices[0]);
|
||||||
|
idxPrev = indices[0];
|
||||||
if (debug)
|
if (debug)
|
||||||
{
|
{
|
||||||
std::cout << "Entropy: " << Metrics::entropy(y, 0, y.size(), Metrics::numClasses(y)) << std::endl;
|
std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl;
|
||||||
}
|
}
|
||||||
for (auto index = indices.begin(); index != indices.end(); ++index)
|
for (auto index = indices.begin(); index != indices.end(); ++index)
|
||||||
{
|
{
|
||||||
@@ -37,12 +39,23 @@ namespace CPPFImdlp
|
|||||||
{
|
{
|
||||||
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
|
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
|
||||||
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
|
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
|
||||||
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" << ((X.at(*index) + xPrev) / 2) << std::endl;
|
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")="
|
||||||
|
<< ((X.at(*index) + xPrev) / 2) << "idxPrev"
|
||||||
|
<< idxPrev << std::endl;
|
||||||
}
|
}
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
|
cutIdx.push_back(idxPrev);
|
||||||
}
|
}
|
||||||
xPrev = X.at(*index);
|
xPrev = X.at(*index);
|
||||||
yPrev = y.at(*index);
|
yPrev = y.at(*index);
|
||||||
|
idxPrev = *index;
|
||||||
|
}
|
||||||
|
std::cout << "Information Gain:" << std::endl;
|
||||||
|
auto nc = Metrics::numClasses(y, indices, 0, indices.size());
|
||||||
|
for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint)
|
||||||
|
{
|
||||||
|
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
|
||||||
|
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
|
||||||
}
|
}
|
||||||
return cutPts;
|
return cutPts;
|
||||||
}
|
}
|
||||||
|
@@ -4,15 +4,29 @@ namespace CPPFImdlp
|
|||||||
Metrics::Metrics()
|
Metrics::Metrics()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
float Metrics::entropy(std::vector<int> &y, int start, int end, int nClasses)
|
int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, int start, int end)
|
||||||
|
{
|
||||||
|
int nClasses = 1;
|
||||||
|
int yAnt = y.at(start);
|
||||||
|
for (auto i = start; i < end; ++i)
|
||||||
|
{
|
||||||
|
if (y.at(i) != yAnt)
|
||||||
|
{
|
||||||
|
nClasses++;
|
||||||
|
yAnt = y.at(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nClasses;
|
||||||
|
}
|
||||||
|
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int nClasses)
|
||||||
{
|
{
|
||||||
float entropy = 0;
|
float entropy = 0;
|
||||||
int nElements = end - start;
|
int nElements = 0;
|
||||||
std::vector<int>
|
std::vector<int> counts(nClasses, 0);
|
||||||
counts(nClasses, 0);
|
for (auto i = &indices[start]; i != &indices[end]; ++i)
|
||||||
for (auto i = start; i < end; i++)
|
|
||||||
{
|
{
|
||||||
counts[y[i]]++;
|
counts[y[*i]]++;
|
||||||
|
nElements++;
|
||||||
}
|
}
|
||||||
for (auto i = 0; i < nClasses; i++)
|
for (auto i = 0; i < nClasses; i++)
|
||||||
{
|
{
|
||||||
@@ -24,17 +38,20 @@ namespace CPPFImdlp
|
|||||||
}
|
}
|
||||||
return entropy;
|
return entropy;
|
||||||
}
|
}
|
||||||
int Metrics::numClasses(std::vector<int> &y)
|
float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses)
|
||||||
{
|
{
|
||||||
int nClasses = 1;
|
float iGain = 0.0;
|
||||||
int yAnt = y.at(0);
|
float entropy, entropyLeft, entropyRight;
|
||||||
for (auto i = y.begin(); i != y.end(); ++i)
|
int nClassesLeft, nClassesRight;
|
||||||
{
|
int nElementsLeft = cutPoint - start, nElementsRight = end - cutPoint;
|
||||||
if (*i != yAnt)
|
int nElements = end - start;
|
||||||
{
|
nClassesLeft = Metrics::numClasses(y, indices, start, cutPoint);
|
||||||
nClasses++;
|
nClassesRight = Metrics::numClasses(y, indices, cutPoint, end);
|
||||||
}
|
entropy = Metrics::entropy(y, indices, start, end, nClasses);
|
||||||
}
|
entropyLeft = Metrics::entropy(y, indices, start, cutPoint, nClassesLeft);
|
||||||
return nClasses;
|
entropyRight = Metrics::entropy(y, indices, cutPoint, end, nClassesRight);
|
||||||
|
iGain = entropy - (float)nElementsLeft / nElements * entropyLeft - (float)nElementsRight / nElements * entropyRight;
|
||||||
|
return iGain;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -9,8 +9,9 @@ namespace CPPFImdlp
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Metrics();
|
Metrics();
|
||||||
static float entropy(std::vector<int> &, int, int, int);
|
static int numClasses(std::vector<int> &, std::vector<size_t>, int, int);
|
||||||
static int numClasses(std::vector<int> &);
|
static float entropy(std::vector<int> &, std::vector<size_t> &, int, int, int);
|
||||||
|
static float informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
Binary file not shown.
Reference in New Issue
Block a user