mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
77b571af71
|
|||
ff7a91a7ec
|
|||
621c19d00c
|
|||
|
790da5cc60 | ||
2775698063
|
|||
9db16d9d3c
|
|||
edd464311f
|
|||
fe32ed4b2a
|
|||
1d95311a7d
|
10
.gitignore
vendored
10
.gitignore
vendored
@@ -33,8 +33,8 @@ MANIFEST
|
|||||||
*.manifest
|
*.manifest
|
||||||
*.spec
|
*.spec
|
||||||
|
|
||||||
# Installer log2s
|
# Installer logs
|
||||||
pip-log2.txt
|
pip-log.txt
|
||||||
pip-delete-this-directory.txt
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
# Unit test / coverage reports
|
# Unit test / coverage reports
|
||||||
@@ -56,7 +56,7 @@ coverage.xml
|
|||||||
*.pot
|
*.pot
|
||||||
|
|
||||||
# Django stuff:
|
# Django stuff:
|
||||||
*.log2
|
*.log
|
||||||
local_settings.py
|
local_settings.py
|
||||||
db.sqlite3
|
db.sqlite3
|
||||||
db.sqlite3-journal
|
db.sqlite3-journal
|
||||||
@@ -134,4 +134,6 @@ cmake-build-debug
|
|||||||
cmake-build-debug/**
|
cmake-build-debug/**
|
||||||
**/lcoverage/**
|
**/lcoverage/**
|
||||||
**/x/*
|
**/x/*
|
||||||
**/*.so
|
**/*.so
|
||||||
|
**/CMakeFiles
|
||||||
|
wheelhouse
|
||||||
|
3
MANIFEST.in
Normal file
3
MANIFEST.in
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
include src/cppmdlp/CPPFImdlp.h
|
||||||
|
include src/cppmdlp/typesFImdlp.h
|
||||||
|
include src/cppmdlp/Metrics.h
|
10
README.md
10
README.md
@@ -3,14 +3,14 @@
|
|||||||
[](https://github.com/Doctorado-ML/FImdlp/actions/workflows/codeql.yml)
|
[](https://github.com/Doctorado-ML/FImdlp/actions/workflows/codeql.yml)
|
||||||
[](https://www.codacy.com/gh/Doctorado-ML/FImdlp/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/FImdlp&utm_campaign=Badge_Grade)
|
[](https://www.codacy.com/gh/Doctorado-ML/FImdlp/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/FImdlp&utm_campaign=Badge_Grade)
|
||||||
[](https://codecov.io/gh/Doctorado-ML/FImdlp)
|
[](https://codecov.io/gh/Doctorado-ML/FImdlp)
|
||||||
[](https://img.shields.io/pypi/v/FImdlp?color=g)
|
[](https://pypi.org/project/FImdlp)
|
||||||

|

|
||||||
|
|
||||||
Discretization algorithm based on the paper by Usama M. Fayyad and Keki B. Irani
|
Discretization algorithm based on the paper by Usama M. Fayyad and Keki B. Irani
|
||||||
|
|
||||||
```
|
|
||||||
Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning. In Proceedings of the 13th International Joint Conference on Artificial Intelligence (IJCAI-95), pages 1022-1027, Montreal, Canada, August 1995.
|
Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning. In Proceedings of the 13th International Joint Conference on Artificial Intelligence (IJCAI-95), pages 1022-1027, Montreal, Canada, August 1995.
|
||||||
```
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@@ -24,8 +24,8 @@ git clone --recurse-submodules https://github.com/doctorado-ml/FImdlp.git
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install -e .
|
pip install -e .
|
||||||
python samples/sample.py iris --original
|
python samples/sample.py iris
|
||||||
python samples/sample.py iris --proposal
|
python samples/sample.py iris --alternative
|
||||||
python samples/sample.py -h # for more options
|
python samples/sample.py -h # for more options
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@@ -1,117 +0,0 @@
|
|||||||
#include "ArffFiles.h"
|
|
||||||
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
ArffFiles::ArffFiles()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
vector<string> ArffFiles::getLines()
|
|
||||||
{
|
|
||||||
return lines;
|
|
||||||
}
|
|
||||||
unsigned long int ArffFiles::getSize()
|
|
||||||
{
|
|
||||||
return lines.size();
|
|
||||||
}
|
|
||||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
|
||||||
{
|
|
||||||
return attributes;
|
|
||||||
}
|
|
||||||
string ArffFiles::getClassName()
|
|
||||||
{
|
|
||||||
return className;
|
|
||||||
}
|
|
||||||
string ArffFiles::getClassType()
|
|
||||||
{
|
|
||||||
return classType;
|
|
||||||
}
|
|
||||||
vector<vector<float>>& ArffFiles::getX()
|
|
||||||
{
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
vector<int>& ArffFiles::getY()
|
|
||||||
{
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
void ArffFiles::load(string fileName, bool classLast)
|
|
||||||
{
|
|
||||||
ifstream file(fileName);
|
|
||||||
string keyword, attribute, type;
|
|
||||||
if (file.is_open()) {
|
|
||||||
string line;
|
|
||||||
while (getline(file, line)) {
|
|
||||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
|
||||||
stringstream ss(line);
|
|
||||||
ss >> keyword >> attribute >> type;
|
|
||||||
attributes.push_back(make_tuple(attribute, type));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (line[0] == '@') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
lines.push_back(line);
|
|
||||||
}
|
|
||||||
file.close();
|
|
||||||
if (attributes.empty())
|
|
||||||
throw invalid_argument("No attributes found");
|
|
||||||
if (classLast) {
|
|
||||||
className = get<0>(attributes.back());
|
|
||||||
classType = get<1>(attributes.back());
|
|
||||||
attributes.pop_back();
|
|
||||||
} else {
|
|
||||||
className = get<0>(attributes.front());
|
|
||||||
classType = get<1>(attributes.front());
|
|
||||||
attributes.erase(attributes.begin());
|
|
||||||
}
|
|
||||||
generateDataset(classLast);
|
|
||||||
} else
|
|
||||||
throw invalid_argument("Unable to open file");
|
|
||||||
}
|
|
||||||
void ArffFiles::generateDataset(bool classLast)
|
|
||||||
{
|
|
||||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
|
||||||
vector<string> yy = vector<string>(lines.size(), "");
|
|
||||||
int labelIndex = classLast ? attributes.size() : 0;
|
|
||||||
for (int i = 0; i < lines.size(); i++) {
|
|
||||||
stringstream ss(lines[i]);
|
|
||||||
string value;
|
|
||||||
int pos = 0, xIndex = 0;
|
|
||||||
while (getline(ss, value, ',')) {
|
|
||||||
if (pos++ == labelIndex) {
|
|
||||||
yy[i] = value;
|
|
||||||
} else {
|
|
||||||
X[xIndex++][i] = stof(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
y = factorize(yy);
|
|
||||||
}
|
|
||||||
string ArffFiles::trim(const string& source)
|
|
||||||
{
|
|
||||||
string s(source);
|
|
||||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
|
||||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
|
|
||||||
{
|
|
||||||
vector<int> yy;
|
|
||||||
yy.reserve(labels_t.size());
|
|
||||||
map<string, int> labelMap;
|
|
||||||
int i = 0;
|
|
||||||
for (string label : labels_t) {
|
|
||||||
if (labelMap.find(label) == labelMap.end()) {
|
|
||||||
labelMap[label] = i++;
|
|
||||||
}
|
|
||||||
yy.push_back(labelMap[label]);
|
|
||||||
}
|
|
||||||
return yy;
|
|
||||||
}
|
|
@@ -1,28 +0,0 @@
|
|||||||
#ifndef ARFFFILES_H
|
|
||||||
#define ARFFFILES_H
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <tuple>
|
|
||||||
using namespace std;
|
|
||||||
class ArffFiles {
|
|
||||||
private:
|
|
||||||
vector<string> lines;
|
|
||||||
vector<tuple<string, string>> attributes;
|
|
||||||
string className, classType;
|
|
||||||
vector<vector<float>> X;
|
|
||||||
vector<int> y;
|
|
||||||
void generateDataset(bool);
|
|
||||||
public:
|
|
||||||
ArffFiles();
|
|
||||||
void load(string, bool = true);
|
|
||||||
vector<string> getLines();
|
|
||||||
unsigned long int getSize();
|
|
||||||
string getClassName();
|
|
||||||
string getClassType();
|
|
||||||
string trim(const string&);
|
|
||||||
vector<vector<float>>& getX();
|
|
||||||
vector<int>& getY();
|
|
||||||
vector<tuple<string, string>> getAttributes();
|
|
||||||
vector<int> factorize(const vector<string>& labels_t);
|
|
||||||
};
|
|
||||||
#endif
|
|
@@ -3,4 +3,4 @@ project(main)
|
|||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
|
|
||||||
add_executable(sample sample.cpp ArffFiles.cpp ../src/cppmdlp/Metrics.cpp ../src/cppmdlp/CPPFImdlp.cpp)
|
add_executable(sample sample.cpp ../src/cppmdlp/tests/ArffFiles.cpp ../src/cppmdlp/Metrics.cpp ../src/cppmdlp/CPPFImdlp.cpp)
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
#include "ArffFiles.h"
|
#include "../src/cppmdlp/tests/ArffFiles.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
@@ -41,7 +41,7 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
cout << y[i] << endl;
|
cout << y[i] << endl;
|
||||||
}
|
}
|
||||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(false);
|
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||||
for (auto i = 0; i < attributes.size(); i++) {
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||||
cout << "--------------------------" << setprecision(3) << endl;
|
cout << "--------------------------" << setprecision(3) << endl;
|
||||||
|
@@ -14,8 +14,9 @@ datasets = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ap = argparse.ArgumentParser()
|
ap = argparse.ArgumentParser()
|
||||||
ap.add_argument("--proposal", action="store_true")
|
ap.add_argument(
|
||||||
ap.add_argument("--original", dest="proposal", action="store_false")
|
"--alternative", dest="proposal", action="store_const", const=1
|
||||||
|
)
|
||||||
ap.add_argument("dataset", type=str, choices=datasets.keys())
|
ap.add_argument("dataset", type=str, choices=datasets.keys())
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
relative = "" if os.path.isdir("src") else ".."
|
relative = "" if os.path.isdir("src") else ".."
|
||||||
@@ -29,7 +30,7 @@ class_name = df.columns.to_list()[class_column]
|
|||||||
X = df.drop(class_name, axis=1)
|
X = df.drop(class_name, axis=1)
|
||||||
y, _ = pd.factorize(df[class_name])
|
y, _ = pd.factorize(df[class_name])
|
||||||
X = X.to_numpy()
|
X = X.to_numpy()
|
||||||
test = FImdlp(proposal=args.proposal)
|
test = FImdlp(algorithm=args.proposal if args.proposal is not None else 0)
|
||||||
now = time.time()
|
now = time.time()
|
||||||
test.fit(X, y)
|
test.fit(X, y)
|
||||||
fit_time = time.time()
|
fit_time = time.time()
|
||||||
|
Submodule src/cppmdlp updated: e21482900b...7b20bde428
@@ -1 +1 @@
|
|||||||
__version__ = "0.9.1"
|
__version__ = "0.9.2"
|
||||||
|
@@ -1,20 +1,20 @@
|
|||||||
# distutils: language = c++
|
# distutils: language = c++
|
||||||
# cython: language_level = 3
|
# cython: language_level = 3
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
from libcpp cimport bool
|
from libcpp.string cimport string
|
||||||
|
|
||||||
cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
|
cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
|
||||||
ctypedef float precision_t
|
ctypedef float precision_t
|
||||||
cdef cppclass CPPFImdlp:
|
cdef cppclass CPPFImdlp:
|
||||||
CPPFImdlp(bool) except +
|
CPPFImdlp(int) except +
|
||||||
CPPFImdlp& fit(vector[precision_t]&, vector[int]&)
|
CPPFImdlp& fit(vector[precision_t]&, vector[int]&)
|
||||||
vector[precision_t] getCutPoints()
|
vector[precision_t] getCutPoints()
|
||||||
|
string version()
|
||||||
|
|
||||||
|
|
||||||
cdef class CFImdlp:
|
cdef class CFImdlp:
|
||||||
cdef CPPFImdlp *thisptr
|
cdef CPPFImdlp *thisptr
|
||||||
def __cinit__(self, proposal):
|
def __cinit__(self, algorithm):
|
||||||
self.thisptr = new CPPFImdlp(proposal)
|
self.thisptr = new CPPFImdlp(algorithm)
|
||||||
def __dealloc__(self):
|
def __dealloc__(self):
|
||||||
del self.thisptr
|
del self.thisptr
|
||||||
def fit(self, X, y):
|
def fit(self, X, y):
|
||||||
@@ -22,4 +22,5 @@ cdef class CFImdlp:
|
|||||||
return self
|
return self
|
||||||
def get_cut_points(self):
|
def get_cut_points(self):
|
||||||
return self.thisptr.getCutPoints()
|
return self.thisptr.getCutPoints()
|
||||||
|
def get_version(self):
|
||||||
|
return self.thisptr.version()
|
||||||
|
@@ -7,14 +7,18 @@ from joblib import Parallel, delayed
|
|||||||
|
|
||||||
|
|
||||||
class FImdlp(TransformerMixin, BaseEstimator):
|
class FImdlp(TransformerMixin, BaseEstimator):
|
||||||
def __init__(self, n_jobs=-1, proposal=False):
|
def __init__(self, algorithm=0, n_jobs=-1):
|
||||||
|
self.algorithm = algorithm
|
||||||
self.n_jobs = n_jobs
|
self.n_jobs = n_jobs
|
||||||
self.proposal = proposal
|
|
||||||
|
|
||||||
"""Fayyad - Irani MDLP discretization algorithm based implementation.
|
"""Fayyad - Irani MDLP discretization algorithm based implementation.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
algorithm : int, default=0
|
||||||
|
The type of algorithm to use computing the cut points.
|
||||||
|
0 - Definitive implementation
|
||||||
|
1 - Alternative proposal
|
||||||
n_jobs : int, default=-1
|
n_jobs : int, default=-1
|
||||||
The number of jobs to run in parallel. :meth:`fit` and
|
The number of jobs to run in parallel. :meth:`fit` and
|
||||||
:meth:`transform`, are parallelized over the features. ``-1`` means
|
:meth:`transform`, are parallelized over the features. ``-1`` means
|
||||||
@@ -94,9 +98,15 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def _fit_discretizer(self, feature):
|
def _fit_discretizer(self, feature):
|
||||||
self.discretizer_[feature] = CFImdlp(proposal=self.proposal)
|
if feature in self.features_:
|
||||||
self.discretizer_[feature].fit(self.X_[:, feature], self.y_)
|
self.discretizer_[feature] = CFImdlp(algorithm=self.algorithm)
|
||||||
self.cut_points_[feature] = self.discretizer_[feature].get_cut_points()
|
self.discretizer_[feature].fit(self.X_[:, feature], self.y_)
|
||||||
|
self.cut_points_[feature] = self.discretizer_[
|
||||||
|
feature
|
||||||
|
].get_cut_points()
|
||||||
|
else:
|
||||||
|
self.discretizer_[feature] = None
|
||||||
|
self.cut_points_[feature] = []
|
||||||
|
|
||||||
def _discretize_feature(self, feature, X, result):
|
def _discretize_feature(self, feature, X, result):
|
||||||
if feature in self.features_:
|
if feature in self.features_:
|
||||||
@@ -125,7 +135,10 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Shape of input is different from what was seen in `fit`"
|
"Shape of input is different from what was seen in `fit`"
|
||||||
)
|
)
|
||||||
result = np.zeros_like(X, dtype=np.int32) - 1
|
if len(self.features_) == self.n_features_:
|
||||||
|
result = np.zeros_like(X, dtype=np.int32) - 1
|
||||||
|
else:
|
||||||
|
result = np.zeros_like(X) - 1
|
||||||
Parallel(n_jobs=self.n_jobs, prefer="threads")(
|
Parallel(n_jobs=self.n_jobs, prefer="threads")(
|
||||||
delayed(self._discretize_feature)(feature, X[:, feature], result)
|
delayed(self._discretize_feature)(feature, X[:, feature], result)
|
||||||
for feature in range(self.n_features_)
|
for feature in range(self.n_features_)
|
||||||
|
@@ -14,47 +14,41 @@ class FImdlpTest(unittest.TestCase):
|
|||||||
def test_init(self):
|
def test_init(self):
|
||||||
clf = FImdlp()
|
clf = FImdlp()
|
||||||
self.assertEqual(-1, clf.n_jobs)
|
self.assertEqual(-1, clf.n_jobs)
|
||||||
self.assertFalse(clf.proposal)
|
self.assertEqual(0, clf.algorithm)
|
||||||
clf = FImdlp(proposal=True, n_jobs=7)
|
clf = FImdlp(algorithm=1, n_jobs=7)
|
||||||
self.assertTrue(clf.proposal)
|
self.assertEqual(1, clf.algorithm)
|
||||||
self.assertEqual(7, clf.n_jobs)
|
self.assertEqual(7, clf.n_jobs)
|
||||||
|
|
||||||
def test_fit_proposal(self):
|
def test_fit_definitive(self):
|
||||||
clf = FImdlp(proposal=True)
|
clf = FImdlp(algorithm=0)
|
||||||
clf.fit([[1, 2], [3, 4]], [1, 2])
|
clf.fit([[1, 2], [3, 4]], [1, 2])
|
||||||
self.assertEqual(clf.n_features_, 2)
|
self.assertEqual(clf.n_features_, 2)
|
||||||
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
|
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
|
||||||
self.assertListEqual(clf.y_.tolist(), [1, 2])
|
self.assertListEqual(clf.y_.tolist(), [1, 2])
|
||||||
self.assertListEqual([[], []], clf.get_cut_points())
|
self.assertListEqual([[2.0], [3.0]], clf.get_cut_points())
|
||||||
X, y = load_iris(return_X_y=True)
|
X, y = load_iris(return_X_y=True)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
self.assertEqual(clf.n_features_, 4)
|
self.assertEqual(clf.n_features_, 4)
|
||||||
self.assertTrue(np.array_equal(X, clf.X_))
|
self.assertTrue(np.array_equal(X, clf.X_))
|
||||||
self.assertTrue(np.array_equal(y, clf.y_))
|
self.assertTrue(np.array_equal(y, clf.y_))
|
||||||
expected = [
|
expected = [
|
||||||
[
|
[5.449999809265137, 6.25],
|
||||||
4.900000095367432,
|
[2.8499999046325684, 3.0, 3.049999952316284, 3.3499999046325684],
|
||||||
5.0,
|
[2.450000047683716, 4.75, 5.050000190734863],
|
||||||
5.099999904632568,
|
[0.800000011920929, 1.4500000476837158, 1.75],
|
||||||
5.400000095367432,
|
|
||||||
5.699999809265137,
|
|
||||||
],
|
|
||||||
[2.6999998092651367, 2.9000000953674316, 3.1999998092651367],
|
|
||||||
[2.3499999046325684, 4.5, 4.800000190734863],
|
|
||||||
[0.75, 1.399999976158142, 1.5, 1.7000000476837158],
|
|
||||||
]
|
]
|
||||||
self.assertListEqual(expected, clf.get_cut_points())
|
self.assertListEqual(expected, clf.get_cut_points())
|
||||||
self.assertListEqual([0, 1, 2, 3], clf.features_)
|
self.assertListEqual([0, 1, 2, 3], clf.features_)
|
||||||
clf.fit(X, y, features=[0, 2, 3])
|
clf.fit(X, y, features=[0, 2, 3])
|
||||||
self.assertListEqual([0, 2, 3], clf.features_)
|
self.assertListEqual([0, 2, 3], clf.features_)
|
||||||
|
|
||||||
def test_fit_original(self):
|
def test_fit_alternative(self):
|
||||||
clf = FImdlp(proposal=False)
|
clf = FImdlp(algorithm=1)
|
||||||
clf.fit([[1, 2], [3, 4]], [1, 2])
|
clf.fit([[1, 2], [3, 4]], [1, 2])
|
||||||
self.assertEqual(clf.n_features_, 2)
|
self.assertEqual(clf.n_features_, 2)
|
||||||
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
|
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
|
||||||
self.assertListEqual(clf.y_.tolist(), [1, 2])
|
self.assertListEqual(clf.y_.tolist(), [1, 2])
|
||||||
self.assertListEqual([[], []], clf.get_cut_points())
|
self.assertListEqual([[2], [3]], clf.get_cut_points())
|
||||||
X, y = load_iris(return_X_y=True)
|
X, y = load_iris(return_X_y=True)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
self.assertEqual(clf.n_features_, 4)
|
self.assertEqual(clf.n_features_, 4)
|
||||||
@@ -62,10 +56,10 @@ class FImdlpTest(unittest.TestCase):
|
|||||||
self.assertTrue(np.array_equal(y, clf.y_))
|
self.assertTrue(np.array_equal(y, clf.y_))
|
||||||
|
|
||||||
expected = [
|
expected = [
|
||||||
[5.5, 5.800000190734863],
|
[5.449999809265137, 5.75],
|
||||||
[2.9000000953674316, 3.3499999046325684],
|
[2.8499999046325684, 3.3499999046325684],
|
||||||
[2.450000047683716, 4.800000190734863],
|
[2.450000047683716, 4.75],
|
||||||
[0.800000011920929, 1.7999999523162842],
|
[0.800000011920929, 1.75],
|
||||||
]
|
]
|
||||||
self.assertListEqual(expected, clf.get_cut_points())
|
self.assertListEqual(expected, clf.get_cut_points())
|
||||||
self.assertListEqual([0, 1, 2, 3], clf.features_)
|
self.assertListEqual([0, 1, 2, 3], clf.features_)
|
||||||
@@ -89,45 +83,58 @@ class FImdlpTest(unittest.TestCase):
|
|||||||
|
|
||||||
def test_fit_features(self):
|
def test_fit_features(self):
|
||||||
clf = FImdlp()
|
clf = FImdlp()
|
||||||
clf.fit([[1, 2], [3, 4]], [1, 2], features=[0])
|
clf.fit([[1, -2], [3, 4]], [1, 2], features=[0])
|
||||||
res = clf.transform([[1, 2], [3, 4]])
|
res = clf.transform([[1, -2], [3, 4]])
|
||||||
self.assertListEqual(res.tolist(), [[0, 2], [0, 4]])
|
self.assertListEqual(res.tolist(), [[0, -2], [1, 4]])
|
||||||
|
X, y = load_iris(return_X_y=True)
|
||||||
|
X_expected = X[:, [0, 2]].copy()
|
||||||
|
clf.fit(X, y, features=[1, 3])
|
||||||
|
X_computed = clf.transform(X)
|
||||||
|
self.assertListEqual(
|
||||||
|
X_expected[:, 0].tolist(), X_computed[:, 0].tolist()
|
||||||
|
)
|
||||||
|
self.assertListEqual(
|
||||||
|
X_expected[:, 1].tolist(), X_computed[:, 2].tolist()
|
||||||
|
)
|
||||||
|
self.assertEqual(X_computed.dtype, np.float64)
|
||||||
|
|
||||||
def test_transform_original(self):
|
def test_transform_definitive(self):
|
||||||
clf = FImdlp(proposal=False)
|
clf = FImdlp(algorithm=0)
|
||||||
clf.fit([[1, 2], [3, 4]], [1, 2])
|
clf.fit([[1, 2], [3, 4]], [1, 2])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [0, 0]]
|
clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [1, 1]]
|
||||||
)
|
)
|
||||||
X, y = load_iris(return_X_y=True)
|
X, y = load_iris(return_X_y=True)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
self.assertEqual(clf.n_features_, 4)
|
self.assertEqual(clf.n_features_, 4)
|
||||||
self.assertTrue(np.array_equal(X, clf.X_))
|
self.assertTrue(np.array_equal(X, clf.X_))
|
||||||
self.assertTrue(np.array_equal(y, clf.y_))
|
self.assertTrue(np.array_equal(y, clf.y_))
|
||||||
|
X_transformed = clf.transform(X)
|
||||||
self.assertListEqual(
|
self.assertListEqual(
|
||||||
clf.transform(X).tolist(), clf.fit(X, y).transform(X).tolist()
|
X_transformed.tolist(), clf.fit(X, y).transform(X).tolist()
|
||||||
)
|
)
|
||||||
|
self.assertEqual(X_transformed.dtype, np.int32)
|
||||||
expected = [
|
expected = [
|
||||||
[0, 0, 1, 1],
|
[1, 0, 1, 1],
|
||||||
[2, 1, 1, 1],
|
[1, 1, 1, 1],
|
||||||
[1, 0, 1, 1],
|
[1, 0, 1, 1],
|
||||||
[0, 0, 1, 1],
|
[0, 0, 1, 1],
|
||||||
[1, 0, 1, 1],
|
[1, 0, 1, 1],
|
||||||
[1, 1, 1, 1],
|
[1, 1, 1, 1],
|
||||||
[1, 0, 1, 1],
|
[1, 1, 1, 1],
|
||||||
]
|
]
|
||||||
self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected))
|
self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected))
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
clf.transform([[1, 2, 3], [4, 5, 6]])
|
clf.transform([[1, 2, 3], [4, 5, 6]])
|
||||||
with self.assertRaises(sklearn.exceptions.NotFittedError):
|
with self.assertRaises(sklearn.exceptions.NotFittedError):
|
||||||
clf = FImdlp(proposal=False)
|
clf = FImdlp(algorithm=0)
|
||||||
clf.transform([[1, 2], [3, 4]])
|
clf.transform([[1, 2], [3, 4]])
|
||||||
|
|
||||||
def test_transform_proposal(self):
|
def test_transform_alternative(self):
|
||||||
clf = FImdlp(proposal=True)
|
clf = FImdlp(algorithm=1)
|
||||||
clf.fit([[1, 2], [3, 4]], [1, 2])
|
clf.fit([[1, 2], [3, 4]], [1, 2])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [0, 0]]
|
clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [1, 1]]
|
||||||
)
|
)
|
||||||
X, y = load_iris(return_X_y=True)
|
X, y = load_iris(return_X_y=True)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
@@ -138,17 +145,17 @@ class FImdlpTest(unittest.TestCase):
|
|||||||
clf.transform(X).tolist(), clf.fit(X, y).transform(X).tolist()
|
clf.transform(X).tolist(), clf.fit(X, y).transform(X).tolist()
|
||||||
)
|
)
|
||||||
expected = [
|
expected = [
|
||||||
[4, 0, 1, 1],
|
|
||||||
[5, 2, 2, 2],
|
|
||||||
[5, 0, 1, 1],
|
|
||||||
[1, 0, 1, 1],
|
[1, 0, 1, 1],
|
||||||
[4, 1, 1, 1],
|
[2, 1, 1, 1],
|
||||||
[5, 2, 1, 1],
|
[2, 0, 1, 1],
|
||||||
[5, 1, 1, 1],
|
[0, 0, 1, 1],
|
||||||
|
[1, 0, 1, 1],
|
||||||
|
[1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1],
|
||||||
]
|
]
|
||||||
self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected))
|
self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected))
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
clf.transform([[1, 2, 3], [4, 5, 6]])
|
clf.transform([[1, 2, 3], [4, 5, 6]])
|
||||||
with self.assertRaises(sklearn.exceptions.NotFittedError):
|
with self.assertRaises(sklearn.exceptions.NotFittedError):
|
||||||
clf = FImdlp(proposal=True)
|
clf = FImdlp(algorithm=1)
|
||||||
clf.transform([[1, 2], [3, 4]])
|
clf.transform([[1, 2], [3, 4]])
|
||||||
|
Reference in New Issue
Block a user