Refactor computeCutPoints

This commit is contained in:
2022-11-30 18:30:08 +01:00
parent 2ca58d139a
commit a982dbd5ca
9 changed files with 67 additions and 27 deletions

View File

@@ -5,7 +5,7 @@
#include <stdio.h> #include <stdio.h>
#include <algorithm> #include <algorithm>
#include "Metrics.h" #include "Metrics.h"
namespace CPPFImdlp namespace mdlp
{ {
std::ostream &operator<<(std::ostream &os, const CutPointBody &cut) std::ostream &operator<<(std::ostream &os, const CutPointBody &cut)
{ {
@@ -23,6 +23,10 @@ namespace CPPFImdlp
CPPFImdlp::~CPPFImdlp() CPPFImdlp::~CPPFImdlp()
{ {
} }
std::vector<CutPointBody> CPPFImdlp::getCutPoints()
{
return cutPoints;
}
void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y) void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y)
{ {
std::cout << "+++++++++++++++++++++++" << std::endl; std::cout << "+++++++++++++++++++++++" << std::endl;
@@ -33,12 +37,13 @@ namespace CPPFImdlp
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]); printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
} }
std::cout << "+++++++++++++++++++++++" << std::endl; std::cout << "+++++++++++++++++++++++" << std::endl;
for (auto item : cutPoints(X, y)) computeCutPoints(X, y);
for (auto item : cutPoints)
{ {
std::cout << item << " X[" << item.end << "]=" << X[item.end] << std::endl; std::cout << item << " X[" << item.end << "]=" << X[item.end] << std::endl;
} }
} }
std::vector<CutPointBody> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y) void CPPFImdlp::computeCutPoints(std::vector<float> &X_, std::vector<int> &y_)
{ {
std::vector<CutPointBody> cutPts; std::vector<CutPointBody> cutPts;
@@ -47,7 +52,9 @@ namespace CPPFImdlp
float xPrev, xCur, xPivot; float xPrev, xCur, xPivot;
int yPrev, yCur, yPivot; int yPrev, yCur, yPivot;
size_t idxPrev, idxPivot, idx, numElements, start; size_t idxPrev, idxPivot, idx, numElements, start;
std::vector<size_t> indices = sortIndices(X); X = X_;
y = y_;
indices = sortIndices(X);
xCur = xPrev = X[indices[0]]; xCur = xPrev = X[indices[0]];
yCur = yPrev = y[indices[0]]; yCur = yPrev = y[indices[0]];
numElements = indices.size() - 1; numElements = indices.size() - 1;
@@ -79,10 +86,11 @@ namespace CPPFImdlp
if (yPivot == -1 || yPrev != yCur) if (yPivot == -1 || yPrev != yCur)
{ {
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = idxPrev; cutPoint.end = idx - 1;
start = idx; start = idx;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue; cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = (xPrev + xCur) / 2; cutPoint.toValue = (xPrev + xCur) / 2;
cutPoint.classNumber = -1;
firstCutPoint = false; firstCutPoint = false;
if (debug) if (debug)
{ {
@@ -95,20 +103,21 @@ namespace CPPFImdlp
xPrev = xPivot; xPrev = xPivot;
idxPrev = indices[idxPivot]; idxPrev = indices[idxPivot];
} }
if (idxPrev >= numElements) if (idx == numElements)
{ {
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = numElements; cutPoint.end = numElements;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue; cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = std::numeric_limits<float>::max(); cutPoint.toValue = std::numeric_limits<float>::max();
cutPoint.classNumber = -1;
if (debug) if (debug)
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
cutIdx.push_back(idxPrev); cutIdx.push_back(idxPrev);
} }
return cutPts; cutPoints = cutPts;
} }
std::vector<float> CPPFImdlp::cutPointsAnt(std::vector<float> &X, std::vector<int> &y) std::vector<float> CPPFImdlp::computeCutPointsAnt(std::vector<float> &X, std::vector<int> &y)
{ {
std::vector<float> cutPts; std::vector<float> cutPts;
std::vector<int> cutIdx; std::vector<int> cutIdx;

View File

@@ -2,12 +2,13 @@
#define CPPFIMDLP_H #define CPPFIMDLP_H
#include <vector> #include <vector>
#include <utility> #include <utility>
namespace CPPFImdlp namespace mdlp
{ {
struct CutPointBody struct CutPointBody
{ {
size_t start, end; size_t start, end; // indices of the sorted vector
float fromValue, toValue; int classNumber; // class assigned to the cut point
float fromValue, toValue; // Values of the variable
}; };
class CPPFImdlp class CPPFImdlp
{ {
@@ -15,15 +16,20 @@ namespace CPPFImdlp
bool debug; bool debug;
int precision; int precision;
float divider; float divider;
std::vector<size_t> std::vector<size_t> indices; // sorted indices to use with X and y
sortIndices(std::vector<float> &); std::vector<float> X;
std::vector<int> y;
std::vector<float> xDiscretized;
std::vector<CutPointBody> cutPoints;
std::vector<size_t> sortIndices(std::vector<float> &);
public: public:
CPPFImdlp(); CPPFImdlp();
CPPFImdlp(int, bool debug = false); CPPFImdlp(int, bool debug = false);
~CPPFImdlp(); ~CPPFImdlp();
std::vector<CutPointBody> cutPoints(std::vector<float> &, std::vector<int> &); std::vector<CutPointBody> getCutPoints();
std::vector<float> cutPointsAnt(std::vector<float> &, std::vector<int> &); void computeCutPoints(std::vector<float> &, std::vector<int> &);
std::vector<float> computeCutPointsAnt(std::vector<float> &, std::vector<int> &);
void debugPoints(std::vector<float> &, std::vector<int> &); void debugPoints(std::vector<float> &, std::vector<int> &);
}; };
} }

View File

@@ -1,5 +1,5 @@
#include "Metrics.h" #include "Metrics.h"
namespace CPPFImdlp namespace mdlp
{ {
Metrics::Metrics() Metrics::Metrics()
{ {

View File

@@ -2,7 +2,7 @@
#define METRICS_H #define METRICS_H
#include <vector> #include <vector>
#include <cmath> #include <cmath>
namespace CPPFImdlp namespace mdlp
{ {
class Metrics class Metrics
{ {

View File

@@ -3,13 +3,26 @@
from libcpp.vector cimport vector from libcpp.vector cimport vector
from libcpp cimport bool from libcpp cimport bool
cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp": cdef extern from "CPPFImdlp.h" namespace "mdlp":
cdef struct CutPointBody:
size_t start, end;
int classNumber;
float fromValue, toValue;
cdef cppclass CPPFImdlp: cdef cppclass CPPFImdlp:
CPPFImdlp() except + CPPFImdlp() except +
CPPFImdlp(int, bool) except + CPPFImdlp(int, bool) except +
vector[CutPointBody] getCutPoints()
vector[float] cutPointsAnt(vector[float]&, vector[int]&) vector[float] cutPointsAnt(vector[float]&, vector[int]&)
void debugPoints(vector[float]&, vector[int]&) void debugPoints(vector[float]&, vector[int]&)
void computeCutPoints(vector[float]&, vector[int]&)
class PCutPointBody:
def __init__(self, start, end, fromValue, toValue):
self.start = start
self.end = end
self.fromValue = fromValue
self.toValue = toValue
cdef class CFImdlp: cdef class CFImdlp:
cdef CPPFImdlp *thisptr cdef CPPFImdlp *thisptr
@@ -17,7 +30,11 @@ cdef class CFImdlp:
self.thisptr = new CPPFImdlp(precision, debug) self.thisptr = new CPPFImdlp(precision, debug)
def __dealloc__(self): def __dealloc__(self):
del self.thisptr del self.thisptr
def cut_points(self, X, y):
self.thisptr.computeCutPoints(X, y)
return self.thisptr.getCutPoints()
def cut_points_ant(self, X, y): def cut_points_ant(self, X, y):
return self.thisptr.cutPointsAnt(X, y) return self.get_cut_points(X, y)
def debug_points(self, X, y): def debug_points(self, X, y):
return self.thisptr.debugPoints(X, y) return self.thisptr.debugPoints(X, y)

Binary file not shown.

View File

@@ -1,17 +1,17 @@
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include <iostream> #include <iostream>
using namespace std; using namespace mdlp;
int main(int argc, char *argv[], char *envp[]) int main(int argc, char *argv[], char *envp[])
{ {
{ {
CPPFImdlp::CPPFImdlp fimdlp = CPPFImdlp::CPPFImdlp(true); CPPFImdlp fimdlp = CPPFImdlp(true);
vector<float> X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; std::vector<float> X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; std::vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
vector<float> cutPts = fimdlp.cutPoints(X, y); fimdlp.computeCutPoints(X, y);
for (auto &cutPt : cutPts) for (struct CutPointBody cutPt : fimdlp.getCutPoints())
{ {
cout << cutPt << endl; std::cout << cutPt << std::endl;
} }
return 0; return 0;
} }

View File

@@ -3,6 +3,7 @@ from fimdlp.mdlp import FImdlp
from fimdlp.cppfimdlp import CFImdlp from fimdlp.cppfimdlp import CFImdlp
import numpy as np import numpy as np
data = load_iris() data = load_iris()
X = data.data X = data.data
y = data.target y = data.target
@@ -16,7 +17,14 @@ test = CFImdlp(debug=False)
# print(k) # print(k)
# k = test.cut_points_ant(X[:, 0], y) # k = test.cut_points_ant(X[:, 0], y)
# print(k) # print(k)
test.debug_points(X[:, 0], y) # test.debug_points(X[:, 0], y)
result = test.cut_points(X[:, 0], y)
for item in result:
print(
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
)
# X = np.array( # X = np.array(
# [ # [