mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Add good_cut filter
This commit is contained in:
@@ -46,9 +46,9 @@ namespace mdlp {
|
|||||||
if (X.size() == 0 || y.size() == 0) {
|
if (X.size() == 0 || y.size() == 0) {
|
||||||
throw invalid_argument("X and y must have at least one element");
|
throw invalid_argument("X and y must have at least one element");
|
||||||
}
|
}
|
||||||
this->indices = sortIndices(X_);
|
indices = sortIndices(X_);
|
||||||
this->xDiscretized = labels(X.size(), -1);
|
xDiscretized = labels(X.size(), -1);
|
||||||
this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
|
numClasses = Metrics::numClasses(y, indices, 0, X.size());
|
||||||
|
|
||||||
if (proposal) {
|
if (proposal) {
|
||||||
computeCutPointsProposal();
|
computeCutPointsProposal();
|
||||||
@@ -168,9 +168,9 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
while (idx < numElements && xCur == xPivot);
|
while (idx < numElements && xCur == xPivot);
|
||||||
// Check if the class changed and there are more than 1 element
|
// Check if the class changed and there are more than 1 element
|
||||||
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur)) {
|
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && goodCut(start, idx, numElements + 1)) {
|
||||||
// Must we add the entropy criteria here?
|
// Must we add the entropy criteria here?
|
||||||
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
// if (totalEntropy - (entropyLeft + entropyRight) > 0) { Accept cut point }
|
||||||
cutPoint.start = start;
|
cutPoint.start = start;
|
||||||
cutPoint.end = idx;
|
cutPoint.end = idx;
|
||||||
start = idx;
|
start = idx;
|
||||||
@@ -211,16 +211,17 @@ namespace mdlp {
|
|||||||
int yPrev;
|
int yPrev;
|
||||||
bool first = true;
|
bool first = true;
|
||||||
// idxPrev is the index of the init instance of the cutPoint
|
// idxPrev is the index of the init instance of the cutPoint
|
||||||
size_t index, idxPrev = 0, idx = indices[0];
|
size_t index, idxPrev = 0, last, idx = indices[0];
|
||||||
xPrev = X[idx];
|
xPrev = X[idx];
|
||||||
yPrev = y[idx];
|
yPrev = y[idx];
|
||||||
for (index = 0; index < size_t(indices.size()) - 1; index++) {
|
last = indices.size() - 1;
|
||||||
|
for (index = 0; index < last; index++) {
|
||||||
idx = indices[index];
|
idx = indices[index];
|
||||||
// Definition 2 Cut points are always on class boundaries &&
|
// Definition 2 Cut points are always on class boundaries &&
|
||||||
// there are more than 1 items in the interval
|
// there are more than 1 items in the interval
|
||||||
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1) {
|
// if (entropy of interval) > (entropyLeft + entropyRight)) { Accept cut point } (goodCut)
|
||||||
|
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1 && goodCut(idxPrev, idx, last + 1)) {
|
||||||
// Must we add the entropy criteria here?
|
// Must we add the entropy criteria here?
|
||||||
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
|
||||||
if (first) {
|
if (first) {
|
||||||
first = false;
|
first = false;
|
||||||
cutPoint.fromValue = numeric_limits<float>::lowest();
|
cutPoint.fromValue = numeric_limits<float>::lowest();
|
||||||
@@ -253,6 +254,21 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
cutPoints = cutPts;
|
cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
|
bool CPPFImdlp::goodCut(size_t start, size_t cut, size_t end)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Meter las entropías en una matríz cuadrada dispersa (samples, samples) M[start, end] iniciada a -1 y si no se ha calculado calcularla y almacenarla
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
|
float entropyLeft = Metrics::entropy(y, indices, start, cut, numClasses);
|
||||||
|
float entropyRight = Metrics::entropy(y, indices, cut, end, numClasses);
|
||||||
|
float entropyInterval = Metrics::entropy(y, indices, start, end, numClasses);
|
||||||
|
if (debug)
|
||||||
|
printf("Entropy L, R, T: L(%5.3g) + R(%5.3g) - T(%5.3g) \t", entropyLeft, entropyRight, entropyInterval);
|
||||||
|
//return (entropyInterval - (entropyLeft + entropyRight) > 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||||
indices_t CPPFImdlp::sortIndices(samples& X_)
|
indices_t CPPFImdlp::sortIndices(samples& X_)
|
||||||
{
|
{
|
||||||
|
@@ -22,6 +22,7 @@ namespace mdlp {
|
|||||||
void computeCutPointsProposal();
|
void computeCutPointsProposal();
|
||||||
bool evaluateCutPoint(cutPoint_t, cutPoint_t);
|
bool evaluateCutPoint(cutPoint_t, cutPoint_t);
|
||||||
void filterCutPoints();
|
void filterCutPoints();
|
||||||
|
bool goodCut(size_t, size_t, size_t); // if the cut candidate reduces entropy
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CPPFImdlp();
|
CPPFImdlp();
|
||||||
|
@@ -19,19 +19,34 @@ int main()
|
|||||||
|
|
||||||
// Read the Data from the file
|
// Read the Data from the file
|
||||||
// as String Vector
|
// as String Vector
|
||||||
|
size_t col;
|
||||||
vector<string> row;
|
vector<string> row;
|
||||||
string line, word;
|
string line, word;
|
||||||
|
vector<vector<float>> dataset = vector<vector<float>>(15, vector<float>());
|
||||||
while (getline(fin, line)) {
|
while (getline(fin, line)) {
|
||||||
if (count++ > 215) {
|
if (count++ > 215) {
|
||||||
row.clear();
|
|
||||||
stringstream ss(line);
|
stringstream ss(line);
|
||||||
|
col = 0;
|
||||||
while (getline(ss, word, ',')) {
|
while (getline(ss, word, ',')) {
|
||||||
row.push_back(word);
|
col = col % 15;
|
||||||
cout << word << " ";
|
dataset[col].push_back(stof(word));
|
||||||
|
cout << col << "-" << word << " ";
|
||||||
|
col++;
|
||||||
}
|
}
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
labels y = labels(dataset[0].begin(), dataset[0].end());
|
||||||
|
cout << "Column 0 (y): " << y.size() << endl;
|
||||||
|
for (auto item : y) {
|
||||||
|
cout << item << " ";
|
||||||
|
}
|
||||||
|
CPPFImdlp test = CPPFImdlp(false, 6, true);
|
||||||
|
test.fit(dataset[3], y);
|
||||||
|
cout << "Cut points: " << test.getCutPoints().size() << endl;
|
||||||
|
for (auto item : test.getCutPoints()) {
|
||||||
|
cout << item << " ";
|
||||||
|
}
|
||||||
fin.close();
|
fin.close();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
Reference in New Issue
Block a user