From 1d95311a7d5951066486384144fc647c392866ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Wed, 14 Dec 2022 12:23:07 +0100 Subject: [PATCH 1/5] fix: :bug: Fix a bug when pip install tries to build the package of File not Found #4 --- MANIFEST.in | 1 + README.md | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c6ee3a9 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include src/cppmdlp/CPPFImdlp.h \ No newline at end of file diff --git a/README.md b/README.md index c264684..f1033d2 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ Discretization algorithm based on the paper by Usama M. Fayyad and Keki B. Irani -``` + Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning. In Proceedings of the 13th International Joint Conference on Artificial Intelligence (IJCAI-95), pages 1022-1027, Montreal, Canada, August 1995. -``` + ## Installation From fe32ed4b2ae8c4d09b408fe02201df9d4c4d440e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Thu, 15 Dec 2022 12:12:44 +0100 Subject: [PATCH 2/5] Update algorithm type to compute cut points --- samples/sample.cpp | 2 +- src/cppmdlp | 2 +- src/fimdlp/_version.py | 2 +- src/fimdlp/cfimdlp.pyx | 2 +- src/fimdlp/mdlp.py | 7 ++++++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/samples/sample.cpp b/samples/sample.cpp index d838bb9..9f18db0 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -41,7 +41,7 @@ int main(int argc, char** argv) } cout << y[i] << endl; } - mdlp::CPPFImdlp test = mdlp::CPPFImdlp(false); + mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0); for (auto i = 0; i < attributes.size(); i++) { cout << "Cut points for " << get<0>(attributes[i]) << endl; cout << "--------------------------" << setprecision(3) << endl; diff --git a/src/cppmdlp b/src/cppmdlp index e214829..50543e4 160000 --- a/src/cppmdlp +++ b/src/cppmdlp @@ -1 +1 @@ -Subproject commit e21482900bdf307c50b0e0b0647458cf2bb32ad5 +Subproject commit 50543e492125754875becba45d1bd7d66ac88a7a diff --git a/src/fimdlp/_version.py b/src/fimdlp/_version.py index d69d16e..a2fecb4 100644 --- a/src/fimdlp/_version.py +++ b/src/fimdlp/_version.py @@ -1 +1 @@ -__version__ = "0.9.1" +__version__ = "0.9.2" diff --git a/src/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx index c831389..83812ac 100644 --- a/src/fimdlp/cfimdlp.pyx +++ b/src/fimdlp/cfimdlp.pyx @@ -6,7 +6,7 @@ from libcpp cimport bool cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp": ctypedef float precision_t cdef cppclass CPPFImdlp: - CPPFImdlp(bool) except + + CPPFImdlp(int) except + CPPFImdlp& fit(vector[precision_t]&, vector[int]&) vector[precision_t] getCutPoints() diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py index b8f11e7..86441e7 100644 --- a/src/fimdlp/mdlp.py +++ b/src/fimdlp/mdlp.py @@ -7,7 +7,7 @@ from joblib import Parallel, delayed class FImdlp(TransformerMixin, BaseEstimator): - def __init__(self, n_jobs=-1, proposal=False): + def __init__(self, n_jobs=-1, proposal=0): self.n_jobs = n_jobs self.proposal = proposal @@ -19,6 +19,11 @@ class FImdlp(TransformerMixin, BaseEstimator): The number of jobs to run in parallel. :meth:`fit` and :meth:`transform`, are parallelized over the features. ``-1`` means using all cores available. + proposal : int, default=0 + The type of algorithm to use computing the cut points. + 0 - Normal implementation + 1 - JA Proposal + 2 - Original proposal Attributes ---------- From edd464311f834ab5194bbe3e8cb26eb623184efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Thu, 15 Dec 2022 12:18:10 +0100 Subject: [PATCH 3/5] fix: :bug: Fix Tests and sample mistake --- samples/sample.py | 7 +++++-- src/fimdlp/tests/FImdlp_test.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/samples/sample.py b/samples/sample.py index e7f5fca..7ab2a19 100644 --- a/samples/sample.py +++ b/samples/sample.py @@ -14,8 +14,11 @@ datasets = { } ap = argparse.ArgumentParser() -ap.add_argument("--proposal", action="store_true") -ap.add_argument("--original", dest="proposal", action="store_false") +ap.add_argument("--proposal", action="store_const", const=1) +ap.add_argument("--original", dest="proposal", action="store_const", const=0) +ap.add_argument( + "--alternative", dest="proposal", action="store_const", const=2 +) ap.add_argument("dataset", type=str, choices=datasets.keys()) args = ap.parse_args() relative = "" if os.path.isdir("src") else ".." diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py index c6d01f0..df8da03 100644 --- a/src/fimdlp/tests/FImdlp_test.py +++ b/src/fimdlp/tests/FImdlp_test.py @@ -14,13 +14,13 @@ class FImdlpTest(unittest.TestCase): def test_init(self): clf = FImdlp() self.assertEqual(-1, clf.n_jobs) - self.assertFalse(clf.proposal) - clf = FImdlp(proposal=True, n_jobs=7) - self.assertTrue(clf.proposal) + self.assertEqual(0, clf.proposal) + clf = FImdlp(proposal=1, n_jobs=7) + self.assertEqual(1, clf.proposal) self.assertEqual(7, clf.n_jobs) def test_fit_proposal(self): - clf = FImdlp(proposal=True) + clf = FImdlp(proposal=1) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual(clf.n_features_, 2) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) @@ -49,7 +49,7 @@ class FImdlpTest(unittest.TestCase): self.assertListEqual([0, 2, 3], clf.features_) def test_fit_original(self): - clf = FImdlp(proposal=False) + clf = FImdlp(proposal=0) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual(clf.n_features_, 2) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) @@ -94,7 +94,7 @@ class FImdlpTest(unittest.TestCase): self.assertListEqual(res.tolist(), [[0, 2], [0, 4]]) def test_transform_original(self): - clf = FImdlp(proposal=False) + clf = FImdlp(proposal=0) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual( clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [0, 0]] @@ -120,11 +120,11 @@ class FImdlpTest(unittest.TestCase): with self.assertRaises(ValueError): clf.transform([[1, 2, 3], [4, 5, 6]]) with self.assertRaises(sklearn.exceptions.NotFittedError): - clf = FImdlp(proposal=False) + clf = FImdlp(proposal=0) clf.transform([[1, 2], [3, 4]]) def test_transform_proposal(self): - clf = FImdlp(proposal=True) + clf = FImdlp(proposal=1) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual( clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [0, 0]] @@ -150,5 +150,5 @@ class FImdlpTest(unittest.TestCase): with self.assertRaises(ValueError): clf.transform([[1, 2, 3], [4, 5, 6]]) with self.assertRaises(sklearn.exceptions.NotFittedError): - clf = FImdlp(proposal=True) + clf = FImdlp(proposal=1) clf.transform([[1, 2], [3, 4]]) From 9db16d9d3cdc1d3e19c09e6d7e09cc73c759f9a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 20 Dec 2022 01:11:39 +0100 Subject: [PATCH 4/5] feat: :sparkles: Add version method to cppfimdlp --- .gitignore | 9 +++++---- src/fimdlp/cfimdlp.pyx | 5 ++++- src/fimdlp/mdlp.py | 12 +++++++++--- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index fc73bb7..2d86bc8 100644 --- a/.gitignore +++ b/.gitignore @@ -33,8 +33,8 @@ MANIFEST *.manifest *.spec -# Installer log2s -pip-log2.txt +# Installer logs +pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports @@ -56,7 +56,7 @@ coverage.xml *.pot # Django stuff: -*.log2 +*.log local_settings.py db.sqlite3 db.sqlite3-journal @@ -134,4 +134,5 @@ cmake-build-debug cmake-build-debug/** **/lcoverage/** **/x/* -**/*.so \ No newline at end of file +**/*.so +**/CMakeFiles \ No newline at end of file diff --git a/src/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx index 83812ac..25a55dc 100644 --- a/src/fimdlp/cfimdlp.pyx +++ b/src/fimdlp/cfimdlp.pyx @@ -1,7 +1,7 @@ # distutils: language = c++ # cython: language_level = 3 from libcpp.vector cimport vector -from libcpp cimport bool +from libcpp.string cimport string cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp": ctypedef float precision_t @@ -9,6 +9,7 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp": CPPFImdlp(int) except + CPPFImdlp& fit(vector[precision_t]&, vector[int]&) vector[precision_t] getCutPoints() + string version() cdef class CFImdlp: @@ -22,4 +23,6 @@ cdef class CFImdlp: return self def get_cut_points(self): return self.thisptr.getCutPoints() + def get_version(self): + return self.thisptr.version() diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py index 86441e7..92db4dc 100644 --- a/src/fimdlp/mdlp.py +++ b/src/fimdlp/mdlp.py @@ -99,9 +99,15 @@ class FImdlp(TransformerMixin, BaseEstimator): return self def _fit_discretizer(self, feature): - self.discretizer_[feature] = CFImdlp(proposal=self.proposal) - self.discretizer_[feature].fit(self.X_[:, feature], self.y_) - self.cut_points_[feature] = self.discretizer_[feature].get_cut_points() + if feature in self.features_: + self.discretizer_[feature] = CFImdlp(proposal=self.proposal) + self.discretizer_[feature].fit(self.X_[:, feature], self.y_) + self.cut_points_[feature] = self.discretizer_[ + feature + ].get_cut_points() + else: + self.discretizer_[feature] = None + self.cut_points_[feature] = [] def _discretize_feature(self, feature, X, result): if feature in self.features_: From 2775698063c3fbc84454d173094d92f641617445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Wed, 21 Dec 2022 19:05:24 +0100 Subject: [PATCH 5/5] test: :zap: --- README.md | 4 +- samples/ArffFiles.cpp | 117 -------------------------------- samples/ArffFiles.h | 28 -------- samples/CMakeLists.txt | 2 +- samples/sample.cpp | 2 +- samples/sample.py | 6 +- src/cppmdlp | 2 +- src/fimdlp/cfimdlp.pyx | 4 +- src/fimdlp/mdlp.py | 20 +++--- src/fimdlp/tests/FImdlp_test.py | 95 ++++++++++++++------------ 10 files changed, 71 insertions(+), 209 deletions(-) delete mode 100644 samples/ArffFiles.cpp delete mode 100644 samples/ArffFiles.h diff --git a/README.md b/README.md index f1033d2..0fd8bd9 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ git clone --recurse-submodules https://github.com/doctorado-ml/FImdlp.git ```bash pip install -e . -python samples/sample.py iris --original -python samples/sample.py iris --proposal +python samples/sample.py iris +python samples/sample.py iris --alternative python samples/sample.py -h # for more options ``` diff --git a/samples/ArffFiles.cpp b/samples/ArffFiles.cpp deleted file mode 100644 index 9baf861..0000000 --- a/samples/ArffFiles.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include "ArffFiles.h" - -#include -#include -#include -#include - -using namespace std; - -ArffFiles::ArffFiles() -{ -} -vector ArffFiles::getLines() -{ - return lines; -} -unsigned long int ArffFiles::getSize() -{ - return lines.size(); -} -vector> ArffFiles::getAttributes() -{ - return attributes; -} -string ArffFiles::getClassName() -{ - return className; -} -string ArffFiles::getClassType() -{ - return classType; -} -vector>& ArffFiles::getX() -{ - return X; -} -vector& ArffFiles::getY() -{ - return y; -} -void ArffFiles::load(string fileName, bool classLast) -{ - ifstream file(fileName); - string keyword, attribute, type; - if (file.is_open()) { - string line; - while (getline(file, line)) { - if (line[0] == '%' || line.empty() || line == "\r" || line == " ") { - continue; - } - if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { - stringstream ss(line); - ss >> keyword >> attribute >> type; - attributes.push_back(make_tuple(attribute, type)); - continue; - } - if (line[0] == '@') { - continue; - } - lines.push_back(line); - } - file.close(); - if (attributes.empty()) - throw invalid_argument("No attributes found"); - if (classLast) { - className = get<0>(attributes.back()); - classType = get<1>(attributes.back()); - attributes.pop_back(); - } else { - className = get<0>(attributes.front()); - classType = get<1>(attributes.front()); - attributes.erase(attributes.begin()); - } - generateDataset(classLast); - } else - throw invalid_argument("Unable to open file"); -} -void ArffFiles::generateDataset(bool classLast) -{ - X = vector>(attributes.size(), vector(lines.size())); - vector yy = vector(lines.size(), ""); - int labelIndex = classLast ? attributes.size() : 0; - for (int i = 0; i < lines.size(); i++) { - stringstream ss(lines[i]); - string value; - int pos = 0, xIndex = 0; - while (getline(ss, value, ',')) { - if (pos++ == labelIndex) { - yy[i] = value; - } else { - X[xIndex++][i] = stof(value); - } - } - } - y = factorize(yy); -} -string ArffFiles::trim(const string& source) -{ - string s(source); - s.erase(0, s.find_first_not_of(" \n\r\t")); - s.erase(s.find_last_not_of(" \n\r\t") + 1); - return s; -} -vector ArffFiles::factorize(const vector& labels_t) -{ - vector yy; - yy.reserve(labels_t.size()); - map labelMap; - int i = 0; - for (string label : labels_t) { - if (labelMap.find(label) == labelMap.end()) { - labelMap[label] = i++; - } - yy.push_back(labelMap[label]); - } - return yy; -} \ No newline at end of file diff --git a/samples/ArffFiles.h b/samples/ArffFiles.h deleted file mode 100644 index 6986d3b..0000000 --- a/samples/ArffFiles.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef ARFFFILES_H -#define ARFFFILES_H -#include -#include -#include -using namespace std; -class ArffFiles { -private: - vector lines; - vector> attributes; - string className, classType; - vector> X; - vector y; - void generateDataset(bool); -public: - ArffFiles(); - void load(string, bool = true); - vector getLines(); - unsigned long int getSize(); - string getClassName(); - string getClassType(); - string trim(const string&); - vector>& getX(); - vector& getY(); - vector> getAttributes(); - vector factorize(const vector& labels_t); -}; -#endif \ No newline at end of file diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 5a67e6e..3f41728 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -3,4 +3,4 @@ project(main) set(CMAKE_CXX_STANDARD 14) -add_executable(sample sample.cpp ArffFiles.cpp ../src/cppmdlp/Metrics.cpp ../src/cppmdlp/CPPFImdlp.cpp) +add_executable(sample sample.cpp ../src/cppmdlp/tests/ArffFiles.cpp ../src/cppmdlp/Metrics.cpp ../src/cppmdlp/CPPFImdlp.cpp) diff --git a/samples/sample.cpp b/samples/sample.cpp index 9f18db0..7410445 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -1,4 +1,4 @@ -#include "ArffFiles.h" +#include "../src/cppmdlp/tests/ArffFiles.h" #include #include #include diff --git a/samples/sample.py b/samples/sample.py index 7ab2a19..b02bb32 100644 --- a/samples/sample.py +++ b/samples/sample.py @@ -14,10 +14,8 @@ datasets = { } ap = argparse.ArgumentParser() -ap.add_argument("--proposal", action="store_const", const=1) -ap.add_argument("--original", dest="proposal", action="store_const", const=0) ap.add_argument( - "--alternative", dest="proposal", action="store_const", const=2 + "--alternative", dest="proposal", action="store_const", const=1 ) ap.add_argument("dataset", type=str, choices=datasets.keys()) args = ap.parse_args() @@ -32,7 +30,7 @@ class_name = df.columns.to_list()[class_column] X = df.drop(class_name, axis=1) y, _ = pd.factorize(df[class_name]) X = X.to_numpy() -test = FImdlp(proposal=args.proposal) +test = FImdlp(algorithm=args.proposal if args.proposal is not None else 0) now = time.time() test.fit(X, y) fit_time = time.time() diff --git a/src/cppmdlp b/src/cppmdlp index 50543e4..35c532b 160000 --- a/src/cppmdlp +++ b/src/cppmdlp @@ -1 +1 @@ -Subproject commit 50543e492125754875becba45d1bd7d66ac88a7a +Subproject commit 35c532bf1df53e02473b96e35a248107845e488e diff --git a/src/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx index 25a55dc..dac8cfc 100644 --- a/src/fimdlp/cfimdlp.pyx +++ b/src/fimdlp/cfimdlp.pyx @@ -14,8 +14,8 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp": cdef class CFImdlp: cdef CPPFImdlp *thisptr - def __cinit__(self, proposal): - self.thisptr = new CPPFImdlp(proposal) + def __cinit__(self, algorithm): + self.thisptr = new CPPFImdlp(algorithm) def __dealloc__(self): del self.thisptr def fit(self, X, y): diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py index 92db4dc..2ea504b 100644 --- a/src/fimdlp/mdlp.py +++ b/src/fimdlp/mdlp.py @@ -7,23 +7,22 @@ from joblib import Parallel, delayed class FImdlp(TransformerMixin, BaseEstimator): - def __init__(self, n_jobs=-1, proposal=0): + def __init__(self, algorithm=0, n_jobs=-1): + self.algorithm = algorithm self.n_jobs = n_jobs - self.proposal = proposal """Fayyad - Irani MDLP discretization algorithm based implementation. Parameters ---------- + algorithm : int, default=0 + The type of algorithm to use computing the cut points. + 0 - Definitive implementation + 1 - Alternative proposal n_jobs : int, default=-1 The number of jobs to run in parallel. :meth:`fit` and :meth:`transform`, are parallelized over the features. ``-1`` means using all cores available. - proposal : int, default=0 - The type of algorithm to use computing the cut points. - 0 - Normal implementation - 1 - JA Proposal - 2 - Original proposal Attributes ---------- @@ -100,7 +99,7 @@ class FImdlp(TransformerMixin, BaseEstimator): def _fit_discretizer(self, feature): if feature in self.features_: - self.discretizer_[feature] = CFImdlp(proposal=self.proposal) + self.discretizer_[feature] = CFImdlp(algorithm=self.algorithm) self.discretizer_[feature].fit(self.X_[:, feature], self.y_) self.cut_points_[feature] = self.discretizer_[ feature @@ -136,7 +135,10 @@ class FImdlp(TransformerMixin, BaseEstimator): raise ValueError( "Shape of input is different from what was seen in `fit`" ) - result = np.zeros_like(X, dtype=np.int32) - 1 + if len(self.features_) == self.n_features_: + result = np.zeros_like(X, dtype=np.int32) - 1 + else: + result = np.zeros_like(X) - 1 Parallel(n_jobs=self.n_jobs, prefer="threads")( delayed(self._discretize_feature)(feature, X[:, feature], result) for feature in range(self.n_features_) diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py index df8da03..99c5864 100644 --- a/src/fimdlp/tests/FImdlp_test.py +++ b/src/fimdlp/tests/FImdlp_test.py @@ -14,47 +14,41 @@ class FImdlpTest(unittest.TestCase): def test_init(self): clf = FImdlp() self.assertEqual(-1, clf.n_jobs) - self.assertEqual(0, clf.proposal) - clf = FImdlp(proposal=1, n_jobs=7) - self.assertEqual(1, clf.proposal) + self.assertEqual(0, clf.algorithm) + clf = FImdlp(algorithm=1, n_jobs=7) + self.assertEqual(1, clf.algorithm) self.assertEqual(7, clf.n_jobs) - def test_fit_proposal(self): - clf = FImdlp(proposal=1) + def test_fit_definitive(self): + clf = FImdlp(algorithm=0) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual(clf.n_features_, 2) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) self.assertListEqual(clf.y_.tolist(), [1, 2]) - self.assertListEqual([[], []], clf.get_cut_points()) + self.assertListEqual([[2.0], [3.0]], clf.get_cut_points()) X, y = load_iris(return_X_y=True) clf.fit(X, y) self.assertEqual(clf.n_features_, 4) self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(y, clf.y_)) expected = [ - [ - 4.900000095367432, - 5.0, - 5.099999904632568, - 5.400000095367432, - 5.699999809265137, - ], - [2.6999998092651367, 2.9000000953674316, 3.1999998092651367], - [2.3499999046325684, 4.5, 4.800000190734863], - [0.75, 1.399999976158142, 1.5, 1.7000000476837158], + [5.449999809265137, 6.25], + [2.8499999046325684, 3.0, 3.049999952316284, 3.3499999046325684], + [2.450000047683716, 4.75, 5.050000190734863], + [0.800000011920929, 1.4500000476837158, 1.75], ] self.assertListEqual(expected, clf.get_cut_points()) self.assertListEqual([0, 1, 2, 3], clf.features_) clf.fit(X, y, features=[0, 2, 3]) self.assertListEqual([0, 2, 3], clf.features_) - def test_fit_original(self): - clf = FImdlp(proposal=0) + def test_fit_alternative(self): + clf = FImdlp(algorithm=1) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual(clf.n_features_, 2) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) self.assertListEqual(clf.y_.tolist(), [1, 2]) - self.assertListEqual([[], []], clf.get_cut_points()) + self.assertListEqual([[2], [3]], clf.get_cut_points()) X, y = load_iris(return_X_y=True) clf.fit(X, y) self.assertEqual(clf.n_features_, 4) @@ -62,10 +56,10 @@ class FImdlpTest(unittest.TestCase): self.assertTrue(np.array_equal(y, clf.y_)) expected = [ - [5.5, 5.800000190734863], - [2.9000000953674316, 3.3499999046325684], - [2.450000047683716, 4.800000190734863], - [0.800000011920929, 1.7999999523162842], + [5.449999809265137, 5.75], + [2.8499999046325684, 3.3499999046325684], + [2.450000047683716, 4.75], + [0.800000011920929, 1.75], ] self.assertListEqual(expected, clf.get_cut_points()) self.assertListEqual([0, 1, 2, 3], clf.features_) @@ -89,45 +83,58 @@ class FImdlpTest(unittest.TestCase): def test_fit_features(self): clf = FImdlp() - clf.fit([[1, 2], [3, 4]], [1, 2], features=[0]) - res = clf.transform([[1, 2], [3, 4]]) - self.assertListEqual(res.tolist(), [[0, 2], [0, 4]]) + clf.fit([[1, -2], [3, 4]], [1, 2], features=[0]) + res = clf.transform([[1, -2], [3, 4]]) + self.assertListEqual(res.tolist(), [[0, -2], [1, 4]]) + X, y = load_iris(return_X_y=True) + X_expected = X[:, [0, 2]].copy() + clf.fit(X, y, features=[1, 3]) + X_computed = clf.transform(X) + self.assertListEqual( + X_expected[:, 0].tolist(), X_computed[:, 0].tolist() + ) + self.assertListEqual( + X_expected[:, 1].tolist(), X_computed[:, 2].tolist() + ) + self.assertEqual(X_computed.dtype, np.float64) - def test_transform_original(self): - clf = FImdlp(proposal=0) + def test_transform_definitive(self): + clf = FImdlp(algorithm=0) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual( - clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [0, 0]] + clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [1, 1]] ) X, y = load_iris(return_X_y=True) clf.fit(X, y) self.assertEqual(clf.n_features_, 4) self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(y, clf.y_)) + X_transformed = clf.transform(X) self.assertListEqual( - clf.transform(X).tolist(), clf.fit(X, y).transform(X).tolist() + X_transformed.tolist(), clf.fit(X, y).transform(X).tolist() ) + self.assertEqual(X_transformed.dtype, np.int32) expected = [ - [0, 0, 1, 1], - [2, 1, 1, 1], + [1, 0, 1, 1], + [1, 1, 1, 1], [1, 0, 1, 1], [0, 0, 1, 1], [1, 0, 1, 1], [1, 1, 1, 1], - [1, 0, 1, 1], + [1, 1, 1, 1], ] self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected)) with self.assertRaises(ValueError): clf.transform([[1, 2, 3], [4, 5, 6]]) with self.assertRaises(sklearn.exceptions.NotFittedError): - clf = FImdlp(proposal=0) + clf = FImdlp(algorithm=0) clf.transform([[1, 2], [3, 4]]) - def test_transform_proposal(self): - clf = FImdlp(proposal=1) + def test_transform_alternative(self): + clf = FImdlp(algorithm=1) clf.fit([[1, 2], [3, 4]], [1, 2]) self.assertEqual( - clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [0, 0]] + clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [1, 1]] ) X, y = load_iris(return_X_y=True) clf.fit(X, y) @@ -138,17 +145,17 @@ class FImdlpTest(unittest.TestCase): clf.transform(X).tolist(), clf.fit(X, y).transform(X).tolist() ) expected = [ - [4, 0, 1, 1], - [5, 2, 2, 2], - [5, 0, 1, 1], [1, 0, 1, 1], - [4, 1, 1, 1], - [5, 2, 1, 1], - [5, 1, 1, 1], + [2, 1, 1, 1], + [2, 0, 1, 1], + [0, 0, 1, 1], + [1, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], ] self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected)) with self.assertRaises(ValueError): clf.transform([[1, 2, 3], [4, 5, 6]]) with self.assertRaises(sklearn.exceptions.NotFittedError): - clf = FImdlp(proposal=1) + clf = FImdlp(algorithm=1) clf.transform([[1, 2], [3, 4]])