Fix entropy and information gain

This commit is contained in:
2022-11-28 11:39:11 +01:00
parent 3d27c4c3b7
commit e9e2a66203
4 changed files with 53 additions and 22 deletions

View File

@@ -18,14 +18,16 @@ namespace CPPFImdlp
std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y) std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
{ {
std::vector<float> cutPts; std::vector<float> cutPts;
std::vector<int> cutIdx;
float xPrev, cutPoint; float xPrev, cutPoint;
int yPrev; int yPrev, idxPrev;
std::vector<size_t> indices = sortIndices(X); std::vector<size_t> indices = sortIndices(X);
xPrev = X.at(indices[0]); xPrev = X.at(indices[0]);
yPrev = y.at(indices[0]); yPrev = y.at(indices[0]);
idxPrev = indices[0];
if (debug) if (debug)
{ {
std::cout << "Entropy: " << Metrics::entropy(y, 0, y.size(), Metrics::numClasses(y)) << std::endl; std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl;
} }
for (auto index = indices.begin(); index != indices.end(); ++index) for (auto index = indices.begin(); index != indices.end(); ++index)
{ {
@@ -37,12 +39,23 @@ namespace CPPFImdlp
{ {
std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //"; std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev; std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" << ((X.at(*index) + xPrev) / 2) << std::endl; std::cout << "* (" << X.at(*index) << ", " << xPrev << ")="
<< ((X.at(*index) + xPrev) / 2) << "idxPrev"
<< idxPrev << std::endl;
} }
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
cutIdx.push_back(idxPrev);
} }
xPrev = X.at(*index); xPrev = X.at(*index);
yPrev = y.at(*index); yPrev = y.at(*index);
idxPrev = *index;
}
std::cout << "Information Gain:" << std::endl;
auto nc = Metrics::numClasses(y, indices, 0, indices.size());
for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint)
{
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
} }
return cutPts; return cutPts;
} }

View File

@@ -4,15 +4,29 @@ namespace CPPFImdlp
Metrics::Metrics() Metrics::Metrics()
{ {
} }
float Metrics::entropy(std::vector<int> &y, int start, int end, int nClasses) int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, int start, int end)
{
int nClasses = 1;
int yAnt = y.at(start);
for (auto i = start; i < end; ++i)
{
if (y.at(i) != yAnt)
{
nClasses++;
yAnt = y.at(i);
}
}
return nClasses;
}
float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int nClasses)
{ {
float entropy = 0; float entropy = 0;
int nElements = end - start; int nElements = 0;
std::vector<int> std::vector<int> counts(nClasses, 0);
counts(nClasses, 0); for (auto i = &indices[start]; i != &indices[end]; ++i)
for (auto i = start; i < end; i++)
{ {
counts[y[i]]++; counts[y[*i]]++;
nElements++;
} }
for (auto i = 0; i < nClasses; i++) for (auto i = 0; i < nClasses; i++)
{ {
@@ -24,17 +38,20 @@ namespace CPPFImdlp
} }
return entropy; return entropy;
} }
int Metrics::numClasses(std::vector<int> &y) float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses)
{ {
int nClasses = 1; float iGain = 0.0;
int yAnt = y.at(0); float entropy, entropyLeft, entropyRight;
for (auto i = y.begin(); i != y.end(); ++i) int nClassesLeft, nClassesRight;
{ int nElementsLeft = cutPoint - start, nElementsRight = end - cutPoint;
if (*i != yAnt) int nElements = end - start;
{ nClassesLeft = Metrics::numClasses(y, indices, start, cutPoint);
nClasses++; nClassesRight = Metrics::numClasses(y, indices, cutPoint, end);
} entropy = Metrics::entropy(y, indices, start, end, nClasses);
} entropyLeft = Metrics::entropy(y, indices, start, cutPoint, nClassesLeft);
return nClasses; entropyRight = Metrics::entropy(y, indices, cutPoint, end, nClassesRight);
iGain = entropy - (float)nElementsLeft / nElements * entropyLeft - (float)nElementsRight / nElements * entropyRight;
return iGain;
} }
} }

View File

@@ -9,8 +9,9 @@ namespace CPPFImdlp
{ {
public: public:
Metrics(); Metrics();
static float entropy(std::vector<int> &, int, int, int); static int numClasses(std::vector<int> &, std::vector<size_t>, int, int);
static int numClasses(std::vector<int> &); static float entropy(std::vector<int> &, std::vector<size_t> &, int, int, int);
static float informationGain(std::vector<int> &y, std::vector<size_t> &indices, int start, int end, int cutPoint, int nClasses);
}; };
} }
#endif #endif