Refactor computeCutPoints

This commit is contained in:
2022-11-30 18:30:08 +01:00
parent 2ca58d139a
commit a982dbd5ca
9 changed files with 67 additions and 27 deletions

View File

@@ -5,7 +5,7 @@
#include <stdio.h>
#include <algorithm>
#include "Metrics.h"
namespace CPPFImdlp
namespace mdlp
{
std::ostream &operator<<(std::ostream &os, const CutPointBody &cut)
{
@@ -23,6 +23,10 @@ namespace CPPFImdlp
CPPFImdlp::~CPPFImdlp()
{
}
std::vector<CutPointBody> CPPFImdlp::getCutPoints()
{
return cutPoints;
}
void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y)
{
std::cout << "+++++++++++++++++++++++" << std::endl;
@@ -33,12 +37,13 @@ namespace CPPFImdlp
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
for (auto item : cutPoints(X, y))
computeCutPoints(X, y);
for (auto item : cutPoints)
{
std::cout << item << " X[" << item.end << "]=" << X[item.end] << std::endl;
}
}
std::vector<CutPointBody> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
void CPPFImdlp::computeCutPoints(std::vector<float> &X_, std::vector<int> &y_)
{
std::vector<CutPointBody> cutPts;
@@ -47,7 +52,9 @@ namespace CPPFImdlp
float xPrev, xCur, xPivot;
int yPrev, yCur, yPivot;
size_t idxPrev, idxPivot, idx, numElements, start;
std::vector<size_t> indices = sortIndices(X);
X = X_;
y = y_;
indices = sortIndices(X);
xCur = xPrev = X[indices[0]];
yCur = yPrev = y[indices[0]];
numElements = indices.size() - 1;
@@ -79,10 +86,11 @@ namespace CPPFImdlp
if (yPivot == -1 || yPrev != yCur)
{
cutPoint.start = start;
cutPoint.end = idxPrev;
cutPoint.end = idx - 1;
start = idx;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = (xPrev + xCur) / 2;
cutPoint.classNumber = -1;
firstCutPoint = false;
if (debug)
{
@@ -95,20 +103,21 @@ namespace CPPFImdlp
xPrev = xPivot;
idxPrev = indices[idxPivot];
}
if (idxPrev >= numElements)
if (idx == numElements)
{
cutPoint.start = start;
cutPoint.end = numElements;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = std::numeric_limits<float>::max();
cutPoint.classNumber = -1;
if (debug)
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
cutPts.push_back(cutPoint);
cutIdx.push_back(idxPrev);
}
return cutPts;
cutPoints = cutPts;
}
std::vector<float> CPPFImdlp::cutPointsAnt(std::vector<float> &X, std::vector<int> &y)
std::vector<float> CPPFImdlp::computeCutPointsAnt(std::vector<float> &X, std::vector<int> &y)
{
std::vector<float> cutPts;
std::vector<int> cutIdx;

View File

@@ -2,12 +2,13 @@
#define CPPFIMDLP_H
#include <vector>
#include <utility>
namespace CPPFImdlp
namespace mdlp
{
struct CutPointBody
{
size_t start, end;
float fromValue, toValue;
size_t start, end; // indices of the sorted vector
int classNumber; // class assigned to the cut point
float fromValue, toValue; // Values of the variable
};
class CPPFImdlp
{
@@ -15,15 +16,20 @@ namespace CPPFImdlp
bool debug;
int precision;
float divider;
std::vector<size_t>
sortIndices(std::vector<float> &);
std::vector<size_t> indices; // sorted indices to use with X and y
std::vector<float> X;
std::vector<int> y;
std::vector<float> xDiscretized;
std::vector<CutPointBody> cutPoints;
std::vector<size_t> sortIndices(std::vector<float> &);
public:
CPPFImdlp();
CPPFImdlp(int, bool debug = false);
~CPPFImdlp();
std::vector<CutPointBody> cutPoints(std::vector<float> &, std::vector<int> &);
std::vector<float> cutPointsAnt(std::vector<float> &, std::vector<int> &);
std::vector<CutPointBody> getCutPoints();
void computeCutPoints(std::vector<float> &, std::vector<int> &);
std::vector<float> computeCutPointsAnt(std::vector<float> &, std::vector<int> &);
void debugPoints(std::vector<float> &, std::vector<int> &);
};
}

View File

@@ -1,5 +1,5 @@
#include "Metrics.h"
namespace CPPFImdlp
namespace mdlp
{
Metrics::Metrics()
{

View File

@@ -2,7 +2,7 @@
#define METRICS_H
#include <vector>
#include <cmath>
namespace CPPFImdlp
namespace mdlp
{
class Metrics
{

View File

@@ -3,13 +3,26 @@
from libcpp.vector cimport vector
from libcpp cimport bool
cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
cdef extern from "CPPFImdlp.h" namespace "mdlp":
cdef struct CutPointBody:
size_t start, end;
int classNumber;
float fromValue, toValue;
cdef cppclass CPPFImdlp:
CPPFImdlp() except +
CPPFImdlp(int, bool) except +
vector[CutPointBody] getCutPoints()
vector[float] cutPointsAnt(vector[float]&, vector[int]&)
void debugPoints(vector[float]&, vector[int]&)
void computeCutPoints(vector[float]&, vector[int]&)
class PCutPointBody:
def __init__(self, start, end, fromValue, toValue):
self.start = start
self.end = end
self.fromValue = fromValue
self.toValue = toValue
cdef class CFImdlp:
cdef CPPFImdlp *thisptr
@@ -17,7 +30,11 @@ cdef class CFImdlp:
self.thisptr = new CPPFImdlp(precision, debug)
def __dealloc__(self):
del self.thisptr
def cut_points(self, X, y):
self.thisptr.computeCutPoints(X, y)
return self.thisptr.getCutPoints()
def cut_points_ant(self, X, y):
return self.thisptr.cutPointsAnt(X, y)
return self.get_cut_points(X, y)
def debug_points(self, X, y):
return self.thisptr.debugPoints(X, y)

Binary file not shown.

View File

@@ -1,17 +1,17 @@
#include "CPPFImdlp.h"
#include <iostream>
using namespace std;
using namespace mdlp;
int main(int argc, char *argv[], char *envp[])
{
{
CPPFImdlp::CPPFImdlp fimdlp = CPPFImdlp::CPPFImdlp(true);
vector<float> X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
vector<float> cutPts = fimdlp.cutPoints(X, y);
for (auto &cutPt : cutPts)
CPPFImdlp fimdlp = CPPFImdlp(true);
std::vector<float> X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
std::vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
fimdlp.computeCutPoints(X, y);
for (struct CutPointBody cutPt : fimdlp.getCutPoints())
{
cout << cutPt << endl;
std::cout << cutPt << std::endl;
}
return 0;
}

View File

@@ -3,6 +3,7 @@ from fimdlp.mdlp import FImdlp
from fimdlp.cppfimdlp import CFImdlp
import numpy as np
data = load_iris()
X = data.data
y = data.target
@@ -16,7 +17,14 @@ test = CFImdlp(debug=False)
# print(k)
# k = test.cut_points_ant(X[:, 0], y)
# print(k)
test.debug_points(X[:, 0], y)
# test.debug_points(X[:, 0], y)
result = test.cut_points(X[:, 0], y)
for item in result:
print(
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
)
# X = np.array(
# [