mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 08:25:51 +00:00
Refactor base proposal
This commit is contained in:
@@ -27,82 +27,38 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
indices = sortIndices(X_);
|
indices = sortIndices(X_);
|
||||||
metrics.setData(y, indices);
|
metrics.setData(y, indices);
|
||||||
computeCutPointsRecursive(0, X.size());
|
computeCutPoints(0, X.size());
|
||||||
//simulateCutPointsRecursive();
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::simulateCutPointsRecursive()
|
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
cutPoints_t jobs = cutPoints_t();
|
int cut;
|
||||||
jobs.push_back(cutPoint_t({ 0, X.size() }));
|
|
||||||
while (jobs.size() > 0) {
|
|
||||||
auto interval = jobs.back();
|
|
||||||
jobs.pop_back();
|
|
||||||
//cout << "start: " << interval.start << " end: " << interval.end << endl;
|
|
||||||
auto cut = getCandidateSimulate(interval.start, interval.end);
|
|
||||||
if (cut == -1 || !mdlp(interval.start, cut, interval.end)) {
|
|
||||||
if (interval.start != 0)
|
|
||||||
xCutPoints.push_back(xcutPoint_t({ interval.start, (X[indices[interval.start]] + X[indices[interval.start - 1]]) / 2 }));
|
|
||||||
if (interval.end != X.size())
|
|
||||||
xCutPoints.push_back(xcutPoint_t({ interval.end, (X[indices[interval.end]] + X[indices[interval.end - 1]]) / 2 }));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
jobs.push_back(cutPoint_t({ interval.start, size_t(cut) }));
|
|
||||||
jobs.push_back(cutPoint_t({ size_t(cut), interval.end }));
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void CPPFImdlp::computeCutPointsRecursive(size_t start, size_t end)
|
|
||||||
{
|
|
||||||
xcutPoint_t cut;
|
|
||||||
//cout << "start: " << start << " end: " << end << endl;
|
|
||||||
if (end - start < 2)
|
if (end - start < 2)
|
||||||
return;
|
return;
|
||||||
cut = getCandidate(start, end);
|
cut = getCandidate(start, end);
|
||||||
if (cut.value == -1 || !mdlp(start, cut.index, end)) {
|
if (cut == -1 || !mdlp(start, cut, end)) {
|
||||||
// cut.value == -1 means that there is no candidate in the interval
|
// cut.value == -1 means that there is no candidate in the interval
|
||||||
// that enhances the information gain
|
// that enhances the information gain
|
||||||
//cout << "¡Ding! " << cut.value << " " << cut.index << endl;
|
|
||||||
if (start != 0)
|
if (start != 0)
|
||||||
xCutPoints.push_back(xcutPoint_t({ start, (X[indices[start]] + X[indices[start - 1]]) / 2 }));
|
xCutPoints.push_back(xcutPoint_t({ start, (X[indices[start]] + X[indices[start - 1]]) / 2 }));
|
||||||
if (end != X.size())
|
if (end != X.size())
|
||||||
xCutPoints.push_back(xcutPoint_t({ end, (X[indices[end]] + X[indices[end - 1]]) / 2 }));
|
xCutPoints.push_back(xcutPoint_t({ end, (X[indices[end]] + X[indices[end - 1]]) / 2 }));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
computeCutPointsRecursive(start, cut.index);
|
computeCutPoints(start, cut);
|
||||||
computeCutPointsRecursive(cut.index, end);
|
computeCutPoints(cut, end);
|
||||||
}
|
}
|
||||||
xcutPoint_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
long int CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
xcutPoint_t candidate;
|
long int candidate = -1, elements = end - start;
|
||||||
int elements = end - start;
|
|
||||||
candidate.value = -1;
|
|
||||||
candidate.index = -1;
|
|
||||||
float entropy_left, entropy_right, minEntropy = numeric_limits<float>::max();
|
float entropy_left, entropy_right, minEntropy = numeric_limits<float>::max();
|
||||||
for (auto idx = start + 1; idx < end; idx++) {
|
for (auto idx = start + 1; idx < end; idx++) {
|
||||||
|
// Cutpoints are always on boudndaries
|
||||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||||
continue;
|
continue;
|
||||||
entropy_left = float(idx - start) / elements * metrics.entropy(start, idx);
|
entropy_left = float(idx - start) / elements * metrics.entropy(start, idx);
|
||||||
entropy_right = float(end - idx) / elements * metrics.entropy(idx, end);
|
entropy_right = float(end - idx) / elements * metrics.entropy(idx, end);
|
||||||
if (entropy_left + entropy_right < minEntropy) {
|
if (entropy_left + entropy_right < minEntropy) {
|
||||||
minEntropy = entropy_left + entropy_right;
|
|
||||||
candidate.value = (X[indices[idx]] + X[indices[idx - 1]]) / 2;
|
|
||||||
candidate.index = idx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return candidate;
|
|
||||||
}
|
|
||||||
int CPPFImdlp::getCandidateSimulate(size_t start, size_t end)
|
|
||||||
{
|
|
||||||
int candidate = -1;
|
|
||||||
int elements = end - start;
|
|
||||||
float entropy_left, entropy_right, minEntropy = numeric_limits<float>::max();
|
|
||||||
for (auto idx = start + 1; idx < end; idx++) {
|
|
||||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
|
||||||
continue;
|
|
||||||
entropy_left = float(idx - start) / elements * metrics.entropy(start, idx);
|
|
||||||
entropy_right = float(end - idx) / elements * metrics.entropy(idx, end);
|
|
||||||
if (minEntropy > entropy_left + entropy_right) {
|
|
||||||
minEntropy = entropy_left + entropy_right;
|
minEntropy = entropy_left + entropy_right;
|
||||||
candidate = idx;
|
candidate = idx;
|
||||||
}
|
}
|
||||||
@@ -127,11 +83,6 @@ namespace mdlp {
|
|||||||
ig = metrics.informationGain(start, cut, end);
|
ig = metrics.informationGain(start, cut, end);
|
||||||
delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2);
|
delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2);
|
||||||
float term = 1 / N * (log2(N - 1) + delta);
|
float term = 1 / N * (log2(N - 1) + delta);
|
||||||
if (debug) {
|
|
||||||
cout << "start: " << start << " cut: " << cut << " end: " << end << endl;
|
|
||||||
cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << endl;
|
|
||||||
cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << endl;
|
|
||||||
}
|
|
||||||
return ig > term;
|
return ig > term;
|
||||||
}
|
}
|
||||||
samples CPPFImdlp::getCutPointsx()
|
samples CPPFImdlp::getCutPointsx()
|
||||||
|
@@ -17,17 +17,14 @@ namespace mdlp {
|
|||||||
xcutPoints_t xCutPoints;
|
xcutPoints_t xCutPoints;
|
||||||
|
|
||||||
static indices_t sortIndices(samples&);
|
static indices_t sortIndices(samples&);
|
||||||
void computeCutPointsRecursive(size_t, size_t);
|
void computeCutPoints(size_t, size_t);
|
||||||
xcutPoint_t getCandidate(size_t, size_t);
|
long int getCandidate(size_t, size_t);
|
||||||
bool mdlp(size_t, size_t, size_t);
|
bool mdlp(size_t, size_t, size_t);
|
||||||
void simulateCutPointsRecursive();
|
|
||||||
int getCandidateSimulate(size_t, size_t);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CPPFImdlp();
|
CPPFImdlp();
|
||||||
CPPFImdlp(bool, int, bool debug = false);
|
CPPFImdlp(bool, int, bool debug = false);
|
||||||
~CPPFImdlp();
|
~CPPFImdlp();
|
||||||
indices_t getIndices();
|
|
||||||
CPPFImdlp& fitx(samples&, labels&);
|
CPPFImdlp& fitx(samples&, labels&);
|
||||||
samples getCutPointsx();
|
samples getCutPointsx();
|
||||||
};
|
};
|
||||||
|
Binary file not shown.
Reference in New Issue
Block a user