mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-16 16:05:52 +00:00
Add max_cuts hyperparamter as in mdlp
This commit is contained in:
Submodule src/cppmdlp updated: a7d13f602d...ed7433672d
@@ -40,11 +40,10 @@ vector<int>& ArffFiles::getY()
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
string line, keyword, attribute, type;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
@@ -79,7 +78,7 @@ void ArffFiles::generateDataset(bool classLast)
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
|
@@ -12,7 +12,7 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
|
||||
ctypedef float precision_t
|
||||
cdef cppclass CPPFImdlp:
|
||||
CPPFImdlp() except +
|
||||
CPPFImdlp(size_t, int) except +
|
||||
CPPFImdlp(size_t, int, float) except +
|
||||
CPPFImdlp& fit(vector[precision_t]&, vector[int]&)
|
||||
int get_depth()
|
||||
vector[precision_t] getCutPoints()
|
||||
@@ -20,8 +20,8 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
|
||||
|
||||
cdef class CFImdlp:
|
||||
cdef CPPFImdlp *thisptr
|
||||
def __cinit__(self, size_t min_length=3, int max_depth=INT_MAX):
|
||||
self.thisptr = new CPPFImdlp(min_length, max_depth)
|
||||
def __cinit__(self, size_t min_length=3, int max_depth=INT_MAX, float max_cuts=0):
|
||||
self.thisptr = new CPPFImdlp(min_length, max_depth, max_cuts)
|
||||
def __dealloc__(self):
|
||||
del self.thisptr
|
||||
def fit(self, X, y):
|
||||
|
@@ -10,10 +10,11 @@ from ._version import __version__
|
||||
|
||||
|
||||
class FImdlp(TransformerMixin, BaseEstimator):
|
||||
def __init__(self, n_jobs=-1, min_length=3, max_depth=1e6):
|
||||
def __init__(self, n_jobs=-1, min_length=3, max_depth=1e6, max_cuts=0):
|
||||
self.n_jobs = n_jobs
|
||||
self.min_length = min_length
|
||||
self.max_depth = max_depth
|
||||
self.max_cuts = max_cuts
|
||||
|
||||
"""Fayyad - Irani MDLP discretization algorithm based implementation.
|
||||
|
||||
@@ -108,7 +109,9 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
||||
def _fit_discretizer(self, feature):
|
||||
if feature in self.features_:
|
||||
self.discretizer_[feature] = CFImdlp(
|
||||
min_length=self.min_length, max_depth=self.max_depth
|
||||
min_length=self.min_length,
|
||||
max_depth=self.max_depth,
|
||||
max_cuts=self.max_cuts,
|
||||
)
|
||||
self.discretizer_[feature].fit(self.X_[:, feature], self.y_)
|
||||
self.cut_points_[feature] = self.discretizer_[
|
||||
|
@@ -7,8 +7,6 @@ from ..cppfimdlp import CFImdlp, factorize, CArffFiles
|
||||
from ..mdlp import FImdlp
|
||||
from .. import __version__
|
||||
|
||||
# from .._version import __version__
|
||||
|
||||
|
||||
class FImdlpTest(unittest.TestCase):
|
||||
delta = 1e-6 # same tolerance as in C++ code
|
||||
@@ -288,6 +286,24 @@ class FImdlpTest(unittest.TestCase):
|
||||
for e, c in zip(expected, computed):
|
||||
self.assertAlmostEqual(e, c, delta=self.delta)
|
||||
|
||||
def test_max_cuts(self):
|
||||
clf = FImdlp(max_cuts=1)
|
||||
X, y = load_iris(return_X_y=True)
|
||||
clf.fit(X, y)
|
||||
expected_cutpoints = [
|
||||
[5.45],
|
||||
[3.35],
|
||||
[2.45],
|
||||
[0.8],
|
||||
]
|
||||
expected_depths = [1] * 4
|
||||
self.assertListEqual(expected_depths, clf.get_depths())
|
||||
for expected, computed in zip(
|
||||
expected_cutpoints, clf.get_cut_points()
|
||||
):
|
||||
for e, c in zip(expected, computed):
|
||||
self.assertAlmostEqual(e, c, delta=self.delta)
|
||||
|
||||
def test_ArffFiles(self):
|
||||
loader = CArffFiles()
|
||||
loader.load(b"src/cppmdlp/tests/datasets/iris.arff")
|
||||
|
Reference in New Issue
Block a user