diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index 990cd6a..af89e5d 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -18,14 +18,16 @@ namespace CPPFImdlp std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) { std::vector cutPts; + std::vector cutIdx; float xPrev, cutPoint; - int yPrev; + int yPrev, idxPrev; std::vector indices = sortIndices(X); xPrev = X.at(indices[0]); yPrev = y.at(indices[0]); + idxPrev = indices[0]; if (debug) { - std::cout << "Entropy: " << Metrics::entropy(y, 0, y.size(), Metrics::numClasses(y)) << std::endl; + std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl; } for (auto index = indices.begin(); index != indices.end(); ++index) { @@ -37,12 +39,23 @@ namespace CPPFImdlp { std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //"; std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev; - std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" << ((X.at(*index) + xPrev) / 2) << std::endl; + std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" + << ((X.at(*index) + xPrev) / 2) << "idxPrev" + << idxPrev << std::endl; } cutPts.push_back(cutPoint); + cutIdx.push_back(idxPrev); } xPrev = X.at(*index); yPrev = y.at(*index); + idxPrev = *index; + } + std::cout << "Information Gain:" << std::endl; + auto nc = Metrics::numClasses(y, indices, 0, indices.size()); + for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint) + { + std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl; + // << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl; } return cutPts; } diff --git a/fimdlp/Metrics.cpp b/fimdlp/Metrics.cpp index 81ec93c..682b8f7 100644 --- a/fimdlp/Metrics.cpp +++ b/fimdlp/Metrics.cpp @@ -4,15 +4,29 @@ namespace CPPFImdlp Metrics::Metrics() { } - float Metrics::entropy(std::vector &y, int start, int end, int nClasses) + int Metrics::numClasses(std::vector &y, std::vector indices, int start, int end) + { + int nClasses = 1; + int yAnt = y.at(start); + for (auto i = start; i < end; ++i) + { + if (y.at(i) != yAnt) + { + nClasses++; + yAnt = y.at(i); + } + } + return nClasses; + } + float Metrics::entropy(std::vector &y, std::vector &indices, int start, int end, int nClasses) { float entropy = 0; - int nElements = end - start; - std::vector - counts(nClasses, 0); - for (auto i = start; i < end; i++) + int nElements = 0; + std::vector counts(nClasses, 0); + for (auto i = &indices[start]; i != &indices[end]; ++i) { - counts[y[i]]++; + counts[y[*i]]++; + nElements++; } for (auto i = 0; i < nClasses; i++) { @@ -24,17 +38,20 @@ namespace CPPFImdlp } return entropy; } - int Metrics::numClasses(std::vector &y) + float Metrics::informationGain(std::vector &y, std::vector &indices, int start, int end, int cutPoint, int nClasses) { - int nClasses = 1; - int yAnt = y.at(0); - for (auto i = y.begin(); i != y.end(); ++i) - { - if (*i != yAnt) - { - nClasses++; - } - } - return nClasses; + float iGain = 0.0; + float entropy, entropyLeft, entropyRight; + int nClassesLeft, nClassesRight; + int nElementsLeft = cutPoint - start, nElementsRight = end - cutPoint; + int nElements = end - start; + nClassesLeft = Metrics::numClasses(y, indices, start, cutPoint); + nClassesRight = Metrics::numClasses(y, indices, cutPoint, end); + entropy = Metrics::entropy(y, indices, start, end, nClasses); + entropyLeft = Metrics::entropy(y, indices, start, cutPoint, nClassesLeft); + entropyRight = Metrics::entropy(y, indices, cutPoint, end, nClassesRight); + iGain = entropy - (float)nElementsLeft / nElements * entropyLeft - (float)nElementsRight / nElements * entropyRight; + return iGain; } + } diff --git a/fimdlp/Metrics.h b/fimdlp/Metrics.h index f4da032..60ef9a5 100644 --- a/fimdlp/Metrics.h +++ b/fimdlp/Metrics.h @@ -9,8 +9,9 @@ namespace CPPFImdlp { public: Metrics(); - static float entropy(std::vector &, int, int, int); - static int numClasses(std::vector &); + static int numClasses(std::vector &, std::vector, int, int); + static float entropy(std::vector &, std::vector &, int, int, int); + static float informationGain(std::vector &y, std::vector &indices, int start, int end, int cutPoint, int nClasses); }; } #endif \ No newline at end of file diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index cb7a931..97e36f9 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ