mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 23:45:57 +00:00
65 lines
2.2 KiB
C++
65 lines
2.2 KiB
C++
#include "Metrics.h"
|
|
#include <set>
|
|
#include <cmath>
|
|
using namespace std;
|
|
namespace mdlp {
|
|
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
|
{
|
|
}
|
|
int Metrics::computeNumClasses(size_t start, size_t end)
|
|
{
|
|
set<int> nClasses;
|
|
for (auto i = start; i < end; ++i) {
|
|
nClasses.insert(y[indices[i]]);
|
|
}
|
|
return nClasses.size();
|
|
}
|
|
void Metrics::setData(labels_t& y_, indices_t& indices_)
|
|
{
|
|
indices = indices_;
|
|
y = y_;
|
|
numClasses = computeNumClasses(0, indices.size());
|
|
entropyCache.clear();
|
|
igCache.clear();
|
|
}
|
|
precision_t Metrics::entropy(size_t start, size_t end)
|
|
{
|
|
precision_t p, ventropy = 0;
|
|
int nElements = 0;
|
|
labels_t counts(numClasses + 1, 0);
|
|
if (end - start < 2)
|
|
return 0;
|
|
if (entropyCache.find({ start, end }) != entropyCache.end()) {
|
|
return entropyCache[{start, end}];
|
|
}
|
|
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
|
counts[y[*i]]++;
|
|
nElements++;
|
|
}
|
|
for (auto count : counts) {
|
|
if (count > 0) {
|
|
p = (precision_t)count / nElements;
|
|
ventropy -= p * log2(p);
|
|
}
|
|
}
|
|
entropyCache[{start, end}] = ventropy;
|
|
return ventropy;
|
|
}
|
|
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
|
{
|
|
precision_t iGain;
|
|
precision_t entropyInterval, entropyLeft, entropyRight;
|
|
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
|
int nElements = end - start;
|
|
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
|
return igCache[make_tuple(start, cut, end)];
|
|
}
|
|
entropyInterval = entropy(start, end);
|
|
entropyLeft = entropy(start, cut);
|
|
entropyRight = entropy(cut, end);
|
|
iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements;
|
|
igCache[make_tuple(start, cut, end)] = iGain;
|
|
return iGain;
|
|
}
|
|
|
|
} |