mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-16 16:05:52 +00:00
Add good_cut filter
This commit is contained in:
@@ -46,9 +46,9 @@ namespace mdlp {
|
||||
if (X.size() == 0 || y.size() == 0) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
this->indices = sortIndices(X_);
|
||||
this->xDiscretized = labels(X.size(), -1);
|
||||
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
|
||||
indices = sortIndices(X_);
|
||||
xDiscretized = labels(X.size(), -1);
|
||||
numClasses = Metrics::numClasses(y, indices, 0, X.size());
|
||||
|
||||
if (proposal) {
|
||||
computeCutPointsProposal();
|
||||
@@ -168,9 +168,9 @@ namespace mdlp {
|
||||
}
|
||||
while (idx < numElements && xCur == xPivot);
|
||||
// Check if the class changed and there are more than 1 element
|
||||
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur)) {
|
||||
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && goodCut(start, idx, numElements + 1)) {
|
||||
// Must we add the entropy criteria here?
|
||||
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
||||
// if (totalEntropy - (entropyLeft + entropyRight) > 0) { Accept cut point }
|
||||
cutPoint.start = start;
|
||||
cutPoint.end = idx;
|
||||
start = idx;
|
||||
@@ -211,16 +211,17 @@ namespace mdlp {
|
||||
int yPrev;
|
||||
bool first = true;
|
||||
// idxPrev is the index of the init instance of the cutPoint
|
||||
size_t index, idxPrev = 0, idx = indices[0];
|
||||
size_t index, idxPrev = 0, last, idx = indices[0];
|
||||
xPrev = X[idx];
|
||||
yPrev = y[idx];
|
||||
for (index = 0; index < size_t(indices.size()) - 1; index++) {
|
||||
last = indices.size() - 1;
|
||||
for (index = 0; index < last; index++) {
|
||||
idx = indices[index];
|
||||
// Definition 2 Cut points are always on class boundaries &&
|
||||
// there are more than 1 items in the interval
|
||||
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1) {
|
||||
// if (entropy of interval) > (entropyLeft + entropyRight)) { Accept cut point } (goodCut)
|
||||
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1 && goodCut(idxPrev, idx, last + 1)) {
|
||||
// Must we add the entropy criteria here?
|
||||
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
||||
if (first) {
|
||||
first = false;
|
||||
cutPoint.fromValue = numeric_limits<float>::lowest();
|
||||
@@ -253,6 +254,21 @@ namespace mdlp {
|
||||
}
|
||||
cutPoints = cutPts;
|
||||
}
|
||||
bool CPPFImdlp::goodCut(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
/*
|
||||
Meter las entropías en una matríz cuadrada dispersa (samples, samples) M[start, end] iniciada a -1 y si no se ha calculado calcularla y almacenarla
|
||||
|
||||
|
||||
*/
|
||||
float entropyLeft = Metrics::entropy(y, indices, start, cut, numClasses);
|
||||
float entropyRight = Metrics::entropy(y, indices, cut, end, numClasses);
|
||||
float entropyInterval = Metrics::entropy(y, indices, start, end, numClasses);
|
||||
if (debug)
|
||||
printf("Entropy L, R, T: L(%5.3g) + R(%5.3g) - T(%5.3g) \t", entropyLeft, entropyRight, entropyInterval);
|
||||
//return (entropyInterval - (entropyLeft + entropyRight) > 0);
|
||||
return true;
|
||||
}
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples& X_)
|
||||
{
|
||||
|
@@ -22,6 +22,7 @@ namespace mdlp {
|
||||
void computeCutPointsProposal();
|
||||
bool evaluateCutPoint(cutPoint_t, cutPoint_t);
|
||||
void filterCutPoints();
|
||||
bool goodCut(size_t, size_t, size_t); // if the cut candidate reduces entropy
|
||||
|
||||
public:
|
||||
CPPFImdlp();
|
||||
|
@@ -19,19 +19,34 @@ int main()
|
||||
|
||||
// Read the Data from the file
|
||||
// as String Vector
|
||||
size_t col;
|
||||
vector<string> row;
|
||||
string line, word;
|
||||
vector<vector<float>> dataset = vector<vector<float>>(15, vector<float>());
|
||||
while (getline(fin, line)) {
|
||||
if (count++ > 215) {
|
||||
row.clear();
|
||||
stringstream ss(line);
|
||||
col = 0;
|
||||
while (getline(ss, word, ',')) {
|
||||
row.push_back(word);
|
||||
cout << word << " ";
|
||||
col = col % 15;
|
||||
dataset[col].push_back(stof(word));
|
||||
cout << col << "-" << word << " ";
|
||||
col++;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
labels y = labels(dataset[0].begin(), dataset[0].end());
|
||||
cout << "Column 0 (y): " << y.size() << endl;
|
||||
for (auto item : y) {
|
||||
cout << item << " ";
|
||||
}
|
||||
CPPFImdlp test = CPPFImdlp(false, 6, true);
|
||||
test.fit(dataset[3], y);
|
||||
cout << "Cut points: " << test.getCutPoints().size() << endl;
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << " ";
|
||||
}
|
||||
fin.close();
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user