Add good_cut filter

This commit is contained in:
2022-12-07 01:26:38 +01:00
parent 7c25c33409
commit 7f4b09d2d6
3 changed files with 44 additions and 12 deletions

View File

@@ -46,9 +46,9 @@ namespace mdlp {
if (X.size() == 0 || y.size() == 0) {
throw invalid_argument("X and y must have at least one element");
}
this->indices = sortIndices(X_);
this->xDiscretized = labels(X.size(), -1);
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
indices = sortIndices(X_);
xDiscretized = labels(X.size(), -1);
numClasses = Metrics::numClasses(y, indices, 0, X.size());
if (proposal) {
computeCutPointsProposal();
@@ -168,9 +168,9 @@ namespace mdlp {
}
while (idx < numElements && xCur == xPivot);
// Check if the class changed and there are more than 1 element
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur)) {
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && goodCut(start, idx, numElements + 1)) {
// Must we add the entropy criteria here?
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
// if (totalEntropy - (entropyLeft + entropyRight) > 0) { Accept cut point }
cutPoint.start = start;
cutPoint.end = idx;
start = idx;
@@ -211,16 +211,17 @@ namespace mdlp {
int yPrev;
bool first = true;
// idxPrev is the index of the init instance of the cutPoint
size_t index, idxPrev = 0, idx = indices[0];
size_t index, idxPrev = 0, last, idx = indices[0];
xPrev = X[idx];
yPrev = y[idx];
for (index = 0; index < size_t(indices.size()) - 1; index++) {
last = indices.size() - 1;
for (index = 0; index < last; index++) {
idx = indices[index];
// Definition 2 Cut points are always on class boundaries &&
// there are more than 1 items in the interval
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1) {
// if (entropy of interval) > (entropyLeft + entropyRight)) { Accept cut point } (goodCut)
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1 && goodCut(idxPrev, idx, last + 1)) {
// Must we add the entropy criteria here?
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
if (first) {
first = false;
cutPoint.fromValue = numeric_limits<float>::lowest();
@@ -253,6 +254,21 @@ namespace mdlp {
}
cutPoints = cutPts;
}
bool CPPFImdlp::goodCut(size_t start, size_t cut, size_t end)
{
/*
Meter las entropías en una matríz cuadrada dispersa (samples, samples) M[start, end] iniciada a -1 y si no se ha calculado calcularla y almacenarla
*/
float entropyLeft = Metrics::entropy(y, indices, start, cut, numClasses);
float entropyRight = Metrics::entropy(y, indices, cut, end, numClasses);
float entropyInterval = Metrics::entropy(y, indices, start, end, numClasses);
if (debug)
printf("Entropy L, R, T: L(%5.3g) + R(%5.3g) - T(%5.3g) \t", entropyLeft, entropyRight, entropyInterval);
//return (entropyInterval - (entropyLeft + entropyRight) > 0);
return true;
}
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices(samples& X_)
{

View File

@@ -22,6 +22,7 @@ namespace mdlp {
void computeCutPointsProposal();
bool evaluateCutPoint(cutPoint_t, cutPoint_t);
void filterCutPoints();
bool goodCut(size_t, size_t, size_t); // if the cut candidate reduces entropy
public:
CPPFImdlp();

View File

@@ -19,19 +19,34 @@ int main()
// Read the Data from the file
// as String Vector
size_t col;
vector<string> row;
string line, word;
vector<vector<float>> dataset = vector<vector<float>>(15, vector<float>());
while (getline(fin, line)) {
if (count++ > 215) {
row.clear();
stringstream ss(line);
col = 0;
while (getline(ss, word, ',')) {
row.push_back(word);
cout << word << " ";
col = col % 15;
dataset[col].push_back(stof(word));
cout << col << "-" << word << " ";
col++;
}
cout << endl;
}
}
labels y = labels(dataset[0].begin(), dataset[0].end());
cout << "Column 0 (y): " << y.size() << endl;
for (auto item : y) {
cout << item << " ";
}
CPPFImdlp test = CPPFImdlp(false, 6, true);
test.fit(dataset[3], y);
cout << "Cut points: " << test.getCutPoints().size() << endl;
for (auto item : test.getCutPoints()) {
cout << item << " ";
}
fin.close();
return 0;
}