diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..4c5de3f --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,50 @@ +name: CI + +on: + push: + branches: [master] + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [macos-latest, ubuntu-latest, windows-latest] + python: [3.9, "3.10"] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install dependencies + run: | + pip install -q --upgrade pip + pip install -q --upgrade codecov coverage black flake8 codacy-coverage + - name: Build and install + run: | + cd FImdlp + make install + - name: Lint + run: | + black --check --diff src + flake8 --count --per-file-ignores="__init__.py:F401" src + - name: Tests + run: | + coverage run -m unittest discover -v - s src + coverage xml + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./coverage.xml + - name: Run codacy-coverage-reporter + if: runner.os == 'Linux' + uses: codacy/codacy-coverage-reporter-action@master + with: + project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} + coverage-reports: coverage.xml diff --git a/.gitignore b/.gitignore index 0d1e804..fc73bb7 100644 --- a/.gitignore +++ b/.gitignore @@ -133,4 +133,5 @@ cfimdlp.cpp cmake-build-debug cmake-build-debug/** **/lcoverage/** -**/x/* \ No newline at end of file +**/x/* +**/*.so \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e488052 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "fimdlp/cppmdlp"] + path = src/cppfimdlp + url = https://github.com/rmontanana/mdlp diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 4d6a74c..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include fimdlp/CPPFImdlp.h diff --git a/Makefile b/Makefile index 8301143..3e37057 100644 --- a/Makefile +++ b/Makefile @@ -1,44 +1,34 @@ SHELL := /bin/bash .DEFAULT_GOAL := help -.PHONY: coverage deps help lint push test doc build +.PHONY: coverage deps help lint push test build install audit clean: ## Clean up - rm -rf build dist *.egg-info - if [ -f fimdlp/cfimdlp.cpp ]; then rm fimdlp/cfimdlp.cpp; fi; - if [ -f fimdlp/cppfimdlp.cpython-310-darwin.so ]; then rm fimdlp/cppfimdlp.cpython-310-darwin.so; fi; - if [ -d fimdlp/testcpp/build ]; then rm -fr fimdlp/testcpp/build/* ; fi; - if [ -d fimdlp/testcpp/lcoverage ]; then rm -fr fimdlp/testcpp/lcoverage/* ; fi; + rm -rf build dist src/*.egg-info + if [ -f src/fimdlp/cfimdlp.cpp ]; then rm src/fimdlp/cfimdlp.cpp; fi; + for file in src/fimdlp/*.so; do \ + if [ -f $${file} ]; then rm $${file}; fi; \ + done test: - coverage run -m unittest -v fimdlp.tests - cd fimdlp/testcpp && ./test - + coverage run -m unittest discover -v -s src coverage: - if [ -d fimdlp/testcpp/build/CMakeFiles ]; then rm -fr fimdlp/testcpp/build/CMakeFiles/* ; fi; make test - cd fimdlp/testcpp && ./cover coverage report -m lint: ## Lint and static-check - black fimdlp - flake8 fimdlp + black src + flake8 --per-file-ignores="__init__.py:F401" src push: ## Push code with tags git push && git push --tags build: ## Build package - rm -fr dist/* - rm -fr build/* - python -m build - -buildext: ## Build extension - rm -fr dist/* - rm -fr build/* make clean - python setup.py build_ext - echo "Build extension success" - if [ -f build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so ] ; then mv build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so fimdlp; fi - if [ -f build/lib.macosx-10.9-universal2-3.10/cppfimdlp.cpython-310-darwin.so ] ; then mv build/lib.macosx-10.9-universal2-3.10/cppfimdlp.cpython-310-darwin.so fimdlp; fi + python -m build --wheel + +install: ## Build extension + make clean + pip install -e . audit: ## Audit pip pip-audit diff --git a/README.md b/README.md index 906273f..0d92dd7 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,31 @@ # FImdlp -Fayyad - Irani MDLP discretization algorithm +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/8b4d784fee13401588aa8c06532a2f6d)](https://www.codacy.com/gh/Doctorado-ML/FImdlp/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/FImdlp&utm_campaign=Badge_Grade) + +Discretization algorithm based on the paper by Usama M. Fayyad and Keki B. Irani + +``` +Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning. In Proceedings of the 13th International Joint Conference on Artificial Intelligence (IJCAI-95), pages 1022-1027, Montreal, Canada, August 1995. +``` ## Build and usage sample +### Python sample + ```bash -python setup.py build_ext --inplace +pip install -e . python samples/sample.py iris --original python samples/sample.py iris --proposal python samples/sample.py -h # for more options ``` + +### C++ sample + +```bash +cd samples +mkdir build +cd build +cmake .. +make +./sample iris +``` diff --git a/cppfimdlp.cpython-310-darwin.so b/cppfimdlp.cpython-310-darwin.so deleted file mode 100755 index d246baf..0000000 Binary files a/cppfimdlp.cpython-310-darwin.so and /dev/null differ diff --git a/debug.cpp b/debug.cpp deleted file mode 100644 index 4bab3e8..0000000 --- a/debug.cpp +++ /dev/null @@ -1,273 +0,0 @@ -std::cout << "+++++++++++++++++++++++" << std::endl; -for (size_t i = 0; i < y.size(); i++) { - printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]); -} -std::cout << "+++++++++++++++++++++++" << std::endl; - -std::cout << "Information Gain:" << std::endl; -auto nc = Metrics::numClasses(y, indices, 0, indices.size()); -for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint) { - std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl; - // << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl; -} - -def test(self): - print("Calculating cut points in python for first feature") - yz = self.y_.copy() - xz = X[:, 0].copy() - xz = xz[np.argsort(X[:, 0])] - yz = yz[np.argsort(X[:, 0])] - cuts = [] - for i in range(1, len(yz)): - if yz[i] != yz[i - 1] and xz[i - 1] < xz[i] : - print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})") - cuts.append((xz[i] + xz[i - 1]) / 2) - print("Cuts calculados en python: ", cuts) - print("-- Cuts calculados en C++ --") - print("Cut points for each feature in Iris dataset:") - for i in range(0, 1): - # datax = self.X_[np.argsort(self.X_[:, i]), i] - # y_ = self.y_[np.argsort(self.X_[:, i])] - datax = self.X_[:, i] - y_ = self.y_ - self.discretizer_.fit(datax, y_) - Xcutpoints = self.discretizer_.get_cut_points() - print( - f"New ({len(Xcutpoints)}):{self.features_[i]:20s}: " - f"{[i['toValue'] for i in Xcutpoints]}" - ) - X_translated = [ - f"{i['classNumber']} - ({i['start']}, {i['end']}) - " - f"({i['fromValue']}, {i['toValue']})" - for i in Xcutpoints - ] - print(X_translated) - print("*******************************") - print("Disretized values:") - print(self.discretizer_.get_discretized_values()) - print("*******************************") - return X - - c++ - i: 0 4.3, 0 - i : 1 4.4, 0 - i : 2 4.4, 0 - i : 3 4.4, 0 - i : 4 4.5, 0 - i : 5 4.6, 0 - i : 6 4.6, 0 - i : 7 4.6, 0 - i : 8 4.6, 0 - i : 9 4.7, 0 - i : 10 4.7, 0 - i : 11 4.8, 0 - i : 12 4.8, 0 - i : 13 4.8, 0 - i : 14 4.8, 0 - i : 15 4.8, 0 - i : 16 4.9, 0 - i : 17 4.9, 0 - i : 18 4.9, 0 - i : 19 4.9, 0 - i : 20 4.9, 1 - - python - i : 0 4.3 0 - i : 1 4.4 0 - i : 2 4.4 0 - i : 3 4.4 0 - i : 4 4.5 0 - i : 5 4.6 0 - i : 6 4.6 0 - i : 7 4.6 0 - i : 8 4.6 0 - i : 9 4.7 0 - i : 10 4.7 0 - i : 11 4.8 0 - i : 12 4.8 0 - i : 13 4.8 0 - i : 14 4.8 0 - i : 15 4.8 0 - i : 16 4.9 1 - i : 17 4.9 2 - i : 18 4.9 0 - i : 19 4.9 0 - i : 20 4.9 0 - - - - idx: 20 entropy_left : 0 entropy_right : 0.488187 -> 0 150 - idx : 21 entropy_left : 0.0670374 entropy_right : 0.489381 -> 0 150 - idx : 22 entropy_left : 0.125003 entropy_right : 0.490573 -> 0 150 - idx : 24 entropy_left : 0.11507 entropy_right : 0.482206 -> 0 150 - idx : 25 entropy_left : 0.162294 entropy_right : 0.483488 -> 0 150 - idx : 29 entropy_left : 0.141244 entropy_right : 0.462922 -> 0 150 - idx : 30 entropy_left : 0.178924 entropy_right : 0.464386 -> 0 150 - idx : 33 entropy_left : 0.163818 entropy_right : 0.444778 -> 0 150 - idx : 34 entropy_left : 0.195735 entropy_right : 0.44637 -> 0 150 - idx : 44 entropy_left : 0.154253 entropy_right : 0.339183 -> 0 150 - idx : 45 entropy_left : 0.178924 entropy_right : 0.34098 -> 0 150 - idx : 51 entropy_left : 0.159328 entropy_right : 0.217547 -> 0 150 - idx : 52 entropy_left : 0.180508 entropy_right : 0.219019 -> 0 150 - idx : 53 entropy_left : 0.177368 entropy_right : 0.189687 -> 0 150 - idx : 58 entropy_left : 0.265229 entropy_right : 0.196677 -> 0 150 - idx : 59 entropy_left : 0.261331 entropy_right : 0.162291 -> 0 150 - idx : 61 entropy_left : 0.289819 entropy_right : 0.164857 -> 0 150 - idx : 62 entropy_left : 0.302928 entropy_right : 0.166175 -> 0 150 - idx : 68 entropy_left : 0.36831 entropy_right : 0.174607 -> 0 150 - idx : 69 entropy_left : 0.364217 entropy_right : 0.131848 -> 0 150 - idx : 70 entropy_left : 0.373248 entropy_right : 0.133048 -> 0 150 - idx : 71 entropy_left : 0.381826 entropy_right : 0.134273 -> 0 150 - idx : 72 entropy_left : 0.377855 entropy_right : 0.0805821 -> 0 150 - idx : 74 entropy_left : 0.393817 entropy_right : 0.0822096 -> 0 150 - idx : 75 entropy_left : 0.401218 entropy_right : 0.0830509 -> 0 150 - idx : 76 entropy_left : 0.397415 entropy_right : 0 -> 0 150 - idx : 77 entropy_left : 0.4045 entropy_right : 0 -> 0 150 - idx : 78 entropy_left : 0.411247 entropy_right : 0 -> 0 150 - idx : 79 entropy_left : 0.417674 entropy_right : 0 -> 0 150 - idx : 81 entropy_left : 0.429626 entropy_right : 0 -> 0 150 - idx : 83 entropy_left : 0.440472 entropy_right : 0 -> 0 150 - idx : 84 entropy_left : 0.445513 entropy_right : 0 -> 0 150 - idx : 87 entropy_left : 0.459246 entropy_right : 0 -> 0 150 - idx : 88 entropy_left : 0.463395 entropy_right : 0 -> 0 150 - idx : 89 entropy_left : 0.467347 entropy_right : 0 -> 0 150 - idx : 91 entropy_left : 0.474691 entropy_right : 0 -> 0 150 - idx : 95 entropy_left : 0.487368 entropy_right : 0 -> 0 150 - idx : 97 entropy_left : 0.492813 entropy_right : 0 -> 0 150 - idx : 99 entropy_left : 0.497728 entropy_right : 0 -> 0 150 - idx : 101 entropy_left : 0.502156 entropy_right : 0 -> 0 150 - idx : 102 entropy_left : 0.504201 entropy_right : 0 -> 0 150 - idx : 104 entropy_left : 0.507973 entropy_right : 0 -> 0 150 - idx : 105 entropy_left : 0.509709 entropy_right : 0 -> 0 150 - idx : 106 entropy_left : 0.511351 entropy_right : 0 -> 0 150 - idx : 107 entropy_left : 0.512902 entropy_right : 0 -> 0 150 - idx : 109 entropy_left : 0.515747 entropy_right : 0 -> 0 150 - idx : 110 entropy_left : 0.517047 entropy_right : 0 -> 0 150 - idx : 113 entropy_left : 0.520497 entropy_right : 0 -> 0 150 - idx : 114 entropy_left : 0.521506 entropy_right : 0 -> 0 150 - idx : 117 entropy_left : 0.524149 entropy_right : 0 -> 0 150 - idx : 118 entropy_left : 0.52491 entropy_right : 0 -> 0 150 - idx : 120 entropy_left : 0.526264 entropy_right : 0 -> 0 150 - idx : 122 entropy_left : 0.52741 entropy_right : 0 -> 0 150 - idx : 127 entropy_left : 0.52946 entropy_right : 0 -> 0 150 - idx : 130 entropy_left : 0.530197 entropy_right : 0 -> 0 150 - idx : 132 entropy_left : 0.530507 entropy_right : 0 -> 0 150 - idx : 133 entropy_left : 0.530611 entropy_right : 0 -> 0 150 - idx : 134 entropy_left : 0.530684 entropy_right : 0 -> 0 150 - idx : 135 entropy_left : 0.530726 entropy_right : 0 -> 0 150 - idx : 137 entropy_left : 0.530721 entropy_right : 0 -> 0 150 - idx : 138 entropy_left : 0.530677 entropy_right : 0 -> 0 150 - cut : 5.5 index : 53 - start : 0 cut : 53 end : 150 - k = 3 k1 = 3 k2 = 3 ent = 0.528321 ent1 = 0.177368 ent2 = 0.189687 - ig = 0.342987 delta = 4.16006 N 150 term 0.0758615 - ¡Ding!5.5 53 - - - idx : 20 entropy_left : 0 entropy_right : 1.5485806065228545 -> 0 150 - idx : 21 entropy_left : 0.2761954276479391 entropy_right : 1.549829505666378 -> 0 150 - idx : 22 entropy_left : 0.5304060778306042 entropy_right : 1.5511852922535474 -> 0 150 - idx : 24 entropy_left : 0.4971501836369671 entropy_right : 1.5419822842863982 -> 0 150 - idx : 25 entropy_left : 0.6395563653739031 entropy_right : 1.5433449229510985 -> 0 150 - idx : 29 entropy_left : 0.574828144380386 entropy_right : 1.5202013991459298 -> 0 150 - idx : 30 entropy_left : 0.6746799231474564 entropy_right : 1.521677608876836 -> 0 150 - idx : 33 entropy_left : 0.6311718053929063 entropy_right : 1.4992098113026513 -> 0 150 - idx : 34 entropy_left : 0.7085966983474103 entropy_right : 1.5007111828980744 -> 0 150 - idx : 44 entropy_left : 0.5928251064639408 entropy_right : 1.3764263022492553 -> 0 150 - idx : 45 entropy_left : 0.6531791627726858 entropy_right : 1.3779796176519241 -> 0 150 - idx : 51 entropy_left : 0.5990326006132177 entropy_right : 1.2367928607774141 -> 0 150 - idx : 52 entropy_left : 0.6496096346956632 entropy_right : 1.2377158231343603 -> 0 150 - idx : 53 entropy_left : 0.6412482850735854 entropy_right : 1.2046986815511866 -> 0 150 - idx : 58 entropy_left : 0.8211258609270055 entropy_right : 1.2056112071736118 -> 0 150 - idx : 59 entropy_left : 0.8128223064150747 entropy_right : 1.167065448996099 -> 0 150 - idx : 61 entropy_left : 0.8623538561746379 entropy_right : 1.1653351793699953 -> 0 150 - idx : 62 entropy_left : 0.9353028851500502 entropy_right : 1.1687172769890006 -> 0 150 - idx : 68 entropy_left : 1.031929035599206 entropy_right : 1.1573913563403753 -> 0 150 - idx : 69 entropy_left : 1.0246284743137688 entropy_right : 1.109500797247481 -> 0 150 - idx : 70 entropy_left : 1.036186417911213 entropy_right : 1.105866621101474 -> 0 150 - idx : 71 entropy_left : 1.0895830429620594 entropy_right : 1.1104593064416028 -> 0 150 - idx : 72 entropy_left : 1.0822273380873693 entropy_right : 1.0511407586429597 -> 0 150 - idx : 74 entropy_left : 1.1015727511177442 entropy_right : 1.041722068095403 -> 0 150 - idx : 75 entropy_left : 1.1457749842070042 entropy_right : 1.0462881865460743 -> 0 150 - idx : 76 entropy_left : 1.1387129726704701 entropy_right : 0.9568886656798212 -> 0 150 - idx : 77 entropy_left : 1.1468549240968817 entropy_right : 0.9505668528932196 -> 0 150 - idx : 78 entropy_left : 1.1848333092150132 entropy_right : 0.9544340029249649 -> 0 150 - idx : 79 entropy_left : 1.1918623939938016 entropy_right : 0.9477073729342066 -> 0 150 - idx : 81 entropy_left : 1.2548698305334247 entropy_right : 0.9557589912150009 -> 0 150 - idx : 83 entropy_left : 1.2659342914094807 entropy_right : 0.9411864371816835 -> 0 150 - idx : 84 entropy_left : 1.2922669208691815 entropy_right : 0.9456603046006402 -> 0 150 - idx : 87 entropy_left : 1.3041589171425696 entropy_right : 0.9182958340544896 -> 0 150 - idx : 88 entropy_left : 1.327572716814381 entropy_right : 0.9235785996175947 -> 0 150 - idx : 89 entropy_left : 1.330465426809402 entropy_right : 0.9127341558073343 -> 0 150 - idx : 91 entropy_left : 1.3709454625942779 entropy_right : 0.9238422284571814 -> 0 150 - idx : 95 entropy_left : 1.378063041001916 entropy_right : 0.8698926856041563 -> 0 150 - idx : 97 entropy_left : 1.4115390027326744 entropy_right : 0.8835850861052532 -> 0 150 - idx : 99 entropy_left : 1.4130351465796736 entropy_right : 0.8478617451660526 -> 0 150 - idx : 101 entropy_left : 1.4412464483479606 entropy_right : 0.863120568566631 -> 0 150 - idx : 102 entropy_left : 1.4415827640191903 entropy_right : 0.8426578772022391 -> 0 150 - idx : 104 entropy_left : 1.4655411381577925 entropy_right : 0.8589810370425963 -> 0 150 - idx : 105 entropy_left : 1.465665295753282 entropy_right : 0.8366407419411673 -> 0 150 - idx : 106 entropy_left : 1.4762911618692924 entropy_right : 0.8453509366224365 -> 0 150 - idx : 107 entropy_left : 1.4762132849962355 entropy_right : 0.8203636429576732 -> 0 150 - idx : 109 entropy_left : 1.4951379218217782 entropy_right : 0.8390040613676977 -> 0 150 - idx : 110 entropy_left : 1.4949188482339508 entropy_right : 0.8112781244591328 -> 0 150 - idx : 113 entropy_left : 1.5183041104369397 entropy_right : 0.8418521897563207 -> 0 150 - idx : 114 entropy_left : 1.51802714866133 entropy_right : 0.8112781244591328 -> 0 150 - idx : 117 entropy_left : 1.5364854516368571 entropy_right : 0.8453509366224365 -> 0 150 - idx : 118 entropy_left : 1.5361890331151247 entropy_right : 0.8112781244591328 -> 0 150 - idx : 120 entropy_left : 1.5462566034163763 entropy_right : 0.8366407419411673 -> 0 150 - idx : 122 entropy_left : 1.545378825051491 entropy_right : 0.74959525725948 -> 0 150 - idx : 127 entropy_left : 1.5644893588382582 entropy_right : 0.828055725379504 -> 0 150 - idx : 130 entropy_left : 1.562956340286807 entropy_right : 0.6098403047164004 -> 0 150 - idx : 132 entropy_left : 1.5687623685201277 entropy_right : 0.6500224216483541 -> 0 150 - idx : 133 entropy_left : 1.5680951037987416 entropy_right : 0.5225593745369408 -> 0 150 - idx : 134 entropy_left : 1.5706540443736308 entropy_right : 0.5435644431995964 -> 0 150 - idx : 135 entropy_left : 1.5699201014782036 entropy_right : 0.35335933502142136 -> 0 150 - idx : 137 entropy_left : 1.5744201314186457 entropy_right : 0.39124356362925566 -> 0 150 - idx : 138 entropy_left : 1.5736921054134685 entropy_right : 0 -> 0 150 - ¡Ding!4.9 20 - - k = 2 k1 = 1 k2 = 2 ent = 0.5225593745369408 ent1 = 0 ent2 = 0.5435644431995964 - ig = 0.010969310349085326 delta = 2.849365059382915 N 17 term 0.4029038270225244 - idx : 135 entropy_left : 0 entropy_right : 0.35335933502142136 -> 134 150 - idx : 137 entropy_left : 0.9182958340544896 entropy_right : 0.39124356362925566 -> 134 150 - idx : 138 entropy_left : 1.0 entropy_right : 0 -> 134 150 - start : 134 cut : 135 end : 150 - k = 2 k1 = 1 k2 = 2 ent = 0.5435644431995964 ent1 = 0 ent2 = 0.35335933502142136 - ig = 0.21229006661701388 delta = 2.426944705701254 N 16 term 0.39586470633186077 - idx : 137 entropy_left : 0 entropy_right : 0.39124356362925566 -> 135 150 - idx : 138 entropy_left : 0.9182958340544896 entropy_right : 0 -> 135 150 - start : 135 cut : 137 end : 150 - k = 2 k1 = 1 k2 = 2 ent = 0.35335933502142136 ent1 = 0 ent2 = 0.39124356362925566 - ig = 0.01428157987606643 delta = 2.8831233792732727 N 15 term 0.44603188675539174 - idx : 138 entropy_left : 0 entropy_right : 0 -> 137 150 - start : 137 cut : 138 end : 150 - k = 2 k1 = 1 k2 = 1 ent = 0.39124356362925566 ent1 = 0 ent2 = 0 - ig = 0.39124356362925566 delta = 2.0248677947990927 N 13 term 0.4315254073477115 - [[4.9, 5.2, 5.4, 6.75]] - - - cut : 1.4 index : 81 - start : 50 cut : 81 end : 96 - k = 2 k1 = 2 k2 = 1 ent = 0.151097 ent1 = 0.205593 ent2 = 0 - ig = 0.0125455 delta = 2.91635 N 46 term 0.182787 - idx : 80 entropy_left : 0 entropy_right : 0 -> 50 81 - cut : 1.4 index : 80 - start : 50 cut : 80 end : 81 - k = 2 k1 = 1 k2 = 1 ent = 0.205593 ent1 = 0 ent2 = 0 - ig = 0.205593 delta = 2.39617 N 31 term 0.235583 - idx : 112 entropy_left : 0 entropy_right : 0.175565 -> 103 150 - idx : 113 entropy_left : 0.468996 entropy_right : 0 -> 103 150 - cut : 1.8 index : 112 - start : 103 cut : 112 end : 150 - k = 2 k1 = 1 k2 = 2 ent = 0.148549 ent1 = 0 ent2 = 0.175565 - ig = 0.00660326 delta = 2.86139 N 47 term 0.178403 - idx : 113 entropy_left : 0 entropy_right : 0 -> 112 150 - cut : 1.8 index : 113 - start : 112 cut : 113 end : 150 - k = 2 k1 = 1 k2 = 1 ent = 0.175565 ent1 = 0 ent2 = 0 - ig = 0.175565 delta = 2.45622 N 38 term 0.201728 - [[4.900000095367432, 4.949999809265137, 5.0, 5.099999904632568, 5.199999809265137, 5.25, 5.400000095367432, 5.449999809265137, - 5.5, 5.550000190734863, 5.599999904632568, 5.699999809265137, 5.800000190734863, 5.900000095367432, 5.949999809265137, 6.0, 6.050000190734863, - 6.099999904632568, 6.149999618530273, 6.199999809265137, 6.25, 6.300000190734863, 6.400000095367432, 6.5, 6.550000190734863, 6.649999618530273, 6.699999809265137, - 6.75, 6.800000190734863, 6.850000381469727, 6.900000095367432, 6.949999809265137, 7.050000190734863]] \ No newline at end of file diff --git a/fimdlp/CMakeLists.txt b/fimdlp/CMakeLists.txt deleted file mode 100644 index abd9f8d..0000000 --- a/fimdlp/CMakeLists.txt +++ /dev/null @@ -1,183 +0,0 @@ -cmake_minimum_required(VERSION 3.24) -project(fimdlp) - -set(CMAKE_CXX_STANDARD 14) - -include_directories(.) -include_directories(testcpp/build/_deps/googletest-src/googlemock/include) -include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock) -include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal) -include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom) -include_directories(testcpp/build/_deps/googletest-src/googlemock/test) -include_directories(testcpp/build/_deps/googletest-src/googletest/include) -include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest) -include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal) -include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom) -include_directories(testcpp/build/_deps/googletest-src/googletest/samples) -include_directories(testcpp/build/_deps/googletest-src/googletest/src) -include_directories(testcpp/build/_deps/googletest-src/googletest/test) - -add_executable(fimdlp - testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-matchers.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-port.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-internal-utils.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-port.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-pp.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-actions.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-cardinalities.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-function-mocker.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-matchers.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-actions.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-matchers.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-nice-strict.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-spec-builders.h - testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock.h - testcpp/build/_deps/googletest-src/googlemock/src/gmock-all.cc - testcpp/build/_deps/googletest-src/googlemock/src/gmock-cardinalities.cc - testcpp/build/_deps/googletest-src/googlemock/src/gmock-internal-utils.cc - testcpp/build/_deps/googletest-src/googlemock/src/gmock-matchers.cc - testcpp/build/_deps/googletest-src/googlemock/src/gmock-spec-builders.cc - testcpp/build/_deps/googletest-src/googlemock/src/gmock.cc - testcpp/build/_deps/googletest-src/googlemock/src/gmock_main.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-actions_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-cardinalities_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-function-mocker_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-internal-utils_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-arithmetic_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-comparisons_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-containers_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-misc_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers_test.h - testcpp/build/_deps/googletest-src/googlemock/test/gmock-more-actions_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-nice-strict_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-port_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp-string_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock-spec-builders_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_all_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_ex_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_leak_test_.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_link2_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.h - testcpp/build/_deps/googletest-src/googlemock/test/gmock_output_test_.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_stress_test.cc - testcpp/build/_deps/googletest-src/googlemock/test/gmock_test.cc - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-port.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-printers.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-death-test-internal.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-filepath.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-internal.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-param-util.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port-arch.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-string.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-type-util.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-assertion-result.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-death-test.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-matchers.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-message.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-param-test.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-printers.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-spi.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-test-part.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-typed-test.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_pred_impl.h - testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_prod.h - testcpp/build/_deps/googletest-src/googletest/samples/prime_tables.h - testcpp/build/_deps/googletest-src/googletest/samples/sample1.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample1.h - testcpp/build/_deps/googletest-src/googletest/samples/sample10_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample1_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample2.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample2.h - testcpp/build/_deps/googletest-src/googletest/samples/sample2_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample3-inl.h - testcpp/build/_deps/googletest-src/googletest/samples/sample3_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample4.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample4.h - testcpp/build/_deps/googletest-src/googletest/samples/sample4_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample5_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample6_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample7_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample8_unittest.cc - testcpp/build/_deps/googletest-src/googletest/samples/sample9_unittest.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-all.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-assertion-result.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-death-test.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-filepath.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-internal-inl.h - testcpp/build/_deps/googletest-src/googletest/src/gtest-matchers.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-port.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-printers.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-test-part.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest-typed-test.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest.cc - testcpp/build/_deps/googletest-src/googletest/src/gtest_main.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-break-on-failure-unittest_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-catch-exceptions-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-color-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test_ex_test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-env-var-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-failfast-unittest_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-filepath-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-filter-unittest_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-global-environment-unittest_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-list-tests-unittest_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-listener-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-message-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-options-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-output-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name1-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name2-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.h - testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test2-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-port-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-printers-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-setuptestsuite-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-shuffle-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-test-part-test.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-throw-on-failure-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/googletest-uninitialized-test_.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test2_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.h - testcpp/build/_deps/googletest-src/googletest/test/gtest-unittest-api_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_all_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_assert_by_exception_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_environment_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_help_test_.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_list_output_unittest_.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_main_unittest.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_no_test_unittest.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_pred_impl_unittest.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_premature_exit_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_prod_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_repeat_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_in_environment_setup_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_sole_header_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_stress_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_test_macro_stack_footprint_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_testbridge_test_.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_throw_on_failure_ex_test.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_unittest.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile1_test_.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile2_test_.cc - testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_output_unittest_.cc - testcpp/build/_deps/googletest-src/googletest/test/production.cc - testcpp/build/_deps/googletest-src/googletest/test/production.h - testcpp/CMakeLists.txt - testcpp/FImdlp_unittest.cc - testcpp/Metrics_unittest.cc - cfimdlp.cpp - CPPFImdlp.cpp - CPPFImdlp.h - Metrics.cpp - Metrics.h - typesFImdlp.h) diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so deleted file mode 100755 index b61ccc0..0000000 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and /dev/null differ diff --git a/fimdlp/testcpp/FImdlp_unittest.cc b/fimdlp/testcpp/FImdlp_unittest.cc deleted file mode 100644 index 1382132..0000000 --- a/fimdlp/testcpp/FImdlp_unittest.cc +++ /dev/null @@ -1,166 +0,0 @@ -#include "gtest/gtest.h" -#include "../Metrics.h" -#include "../CPPFImdlp.h" -namespace mdlp { - class TestFImdlp: public CPPFImdlp, public testing::Test { - public: - TestFImdlp(): CPPFImdlp(false) {} - void SetUp() - { - // 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0] - //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2) - X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; - y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; - fit(X, y); - } - void setProposal(bool value) - { - proposal = value; - } - void initIndices() - { - indices = indices_t(); - } - void checkSortedVector(samples_t& X_, indices_t indices_) - { - X = X_; - indices = indices_; - indices_t testSortedIndices = sortIndices(X); - precision_t prev = X[testSortedIndices[0]]; - for (auto i = 0; i < X.size(); ++i) { - EXPECT_EQ(testSortedIndices[i], indices[i]); - EXPECT_LE(prev, X[testSortedIndices[i]]); - prev = X[testSortedIndices[i]]; - } - } - void checkCutPoints(cutPoints_t& expected) - { - int expectedSize = expected.size(); - EXPECT_EQ(cutPoints.size(), expectedSize); - for (auto i = 0; i < expectedSize; i++) { - EXPECT_EQ(cutPoints[i], expected[i]); - } - } - template - void checkVectors(std::vector const& expected, std::vector const& computed) - { - EXPECT_EQ(expected.size(), computed.size()); - for (auto i = 0; i < expected.size(); i++) { - EXPECT_EQ(expected[i], computed[i]); - } - } - }; - TEST_F(TestFImdlp, FitErrorEmptyDataset) - { - X = samples_t(); - y = labels_t(); - EXPECT_THROW(fit(X, y), std::invalid_argument); - } -} -// -// TEST_F(TestFImdlp, FitErrorDifferentSize) -// { -// X = { 1, 2, 3 }; -// y = { 1, 2 }; -// EXPECT_THROW(fit(X, y), std::invalid_argument); -// } -// TEST_F(TestFImdlp, SortIndices) -// { -// X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; -// indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; -// checkSortedVector(X, indices); -// X = { 5.77, 5.88, 5.99 }; -// indices = { 0, 1, 2 }; -// checkSortedVector(X, indices); -// X = { 5.33, 5.22, 5.11 }; -// indices = { 2, 1, 0 }; -// checkSortedVector(X, indices); -// } -// TEST_F(TestFImdlp, EvaluateCutPoint) -// { -// cutPoint_t rest, candidate; -// rest = { 0, 10, -1, -1, 1000 }; -// candidate = { 0, 4, -1, -1, 5.15 }; -// EXPECT_FALSE(evaluateCutPoint(rest, candidate)); -// } -// TEST_F(TestFImdlp, ComputeCutPointsOriginal) -// { -// cutPoints_t expected; -// expected = { -// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 }, -// { 6, 10, -1, 5.45, 3.4028234663852886e+38 } -// }; -// setCutPoints(cutPoints_t()); -// computeCutPointsOriginal(); -// checkCutPoints(expected); -// } -// TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) -// { -// cutPoints_t expected; -// expected = { -// { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 }, -// }; -// X = { 0, 1, 2, 2 }; -// y = { 1, 1, 1, 2 }; -// fit(X, y); -// computeCutPointsOriginal(); -// checkCutPoints(expected); -// } -// TEST_F(TestFImdlp, ComputeCutPointsProposal) -// { -// cutPoints_t expected; -// expected = { -// { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 }, -// { 6, 9, -1, 5.4, 5.85 }, -// { 9, 10, -1, 5.85, 3.4028234663852886e+38 } -// }; -// computeCutPointsProposal(); -// checkCutPoints(expected); -// } -// TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) -// { -// cutPoints_t expected; -// expected = { -// { 0, 3, -1, -3.4028234663852886e+38, 1.5 }, -// { 3, 4, -1, 1.5, 3.4028234663852886e+38 } -// }; -// X = { 0, 1, 2, 2 }; -// y = { 1, 1, 1, 2 }; -// fit(X, y); -// computeCutPointsProposal(); -// checkCutPoints(expected); -// } -// TEST_F(TestFImdlp, DiscretizedValues) -// { -// labels_t computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -// computed = getDiscretizedValues(); -// checkVectors(expected, computed); -// } -// TEST_F(TestFImdlp, GetCutPoints) -// { -// samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 }; -// computeCutPointsOriginal(); -// computed = getCutPoints(); -// checkVectors(expected, computed); -// } -// TEST_F(TestFImdlp, Constructor) -// { -// samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; -// labels_t y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; -// setProposal(false); -// fit(X, y); -// computeCutPointsOriginal(); -// cutPoints_t expected; -// vector computed = getCutPoints(); -// expected = { -// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 }, -// { 6, 10, -1, 5.45, 3.4028234663852886e+38 } -// }; -// computed = getCutPoints(); -// int expectedSize = expected.size(); -// EXPECT_EQ(computed.size(), expected.size()); -// for (auto i = 0; i < expectedSize; i++) { -// EXPECT_NEAR(computed[i], expected[i].toValue, .00000001); -// } -// } -//} \ No newline at end of file diff --git a/fimdlp/testcpp/xx/CMakeLists.txt b/fimdlp/testcpp/xx/CMakeLists.txt deleted file mode 100644 index 5db4f6b..0000000 --- a/fimdlp/testcpp/xx/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -cmake_minimum_required(VERSION 3.24) -project(main) - -set(CMAKE_CXX_STANDARD 17) - -add_executable(main main.cpp ArffFiles.cpp ../../Metrics.cpp ../../CPPFImdlp.cpp) diff --git a/pyproject.toml b/pyproject.toml index 6617025..8e166b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,8 @@ requires = ["setuptools", "cython", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools] -packages = ["fimdlp"] +#packages = ["fimdlp"] +package-dir = { "" = "src" } license-files = ["LICENSE"] [tool.setuptools.dynamic] @@ -17,8 +18,8 @@ authors = [ { name = "Ricardo Montañana", email = "ricardo.montanana@alu.uclm.es" }, ] dynamic = ['version'] -dependencies = ["numpy"] -requires-python = ">=3.8" +dependencies = ["numpy", "joblib"] +requires-python = ">=3.9" classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", @@ -30,7 +31,6 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ] @@ -40,7 +40,7 @@ Home = "https://github.com/doctorado-ml/FImdlp" [tool.black] line-length = 79 -target_version = ['py38', 'py39', 'py310'] +target_version = ['py39', 'py310'] include = '\.pyi?$' exclude = ''' /( @@ -55,3 +55,7 @@ exclude = ''' | dist )/ ''' +[tool.flake8] +per-file-ignores = ['__init__.py:F401'] +max-line-length = 79 +count = true diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index df85077..84a8742 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -1,6 +1,6 @@ -cmake_minimum_required(VERSION 3.24) +cmake_minimum_required(VERSION 3.20) project(main) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 14) -add_executable(sample sample.cpp ArffFiles.cpp ../fimdlp/Metrics.cpp ../fimdlp/CPPFImdlp.cpp) +add_executable(sample sample.cpp ArffFiles.cpp ../src/fimdlp/cppmdlp/Metrics.cpp ../src/fimdlp/cppmdlp/CPPFImdlp.cpp) diff --git a/samples/sample.cpp b/samples/sample.cpp index 1a9e407..d838bb9 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -2,7 +2,7 @@ #include #include #include -#include "../fimdlp/CPPFImdlp.h" +#include "../src/cppmdlp/CPPFImdlp.h" using namespace std; @@ -10,7 +10,7 @@ int main(int argc, char** argv) { ArffFiles file; vector lines; - string path = "../fimdlp/testcpp/datasets/"; + string path = "../../src/cppmdlp/tests/datasets/"; map datasets = { {"mfeat-factors", true}, {"iris", true}, diff --git a/samples/sample.py b/samples/sample.py index 6fcf065..e7f5fca 100644 --- a/samples/sample.py +++ b/samples/sample.py @@ -18,9 +18,9 @@ ap.add_argument("--proposal", action="store_true") ap.add_argument("--original", dest="proposal", action="store_false") ap.add_argument("dataset", type=str, choices=datasets.keys()) args = ap.parse_args() -relative = "" if os.path.isdir("fimdlp") else ".." +relative = "" if os.path.isdir("src") else ".." file_name = os.path.join( - relative, "fimdlp", "testcpp", "datasets", args.dataset + relative, "src", "cppmdlp", "tests", "datasets", args.dataset ) data = arff.loadarff(file_name + ".arff") df = pd.DataFrame(data[0]) diff --git a/setup.py b/setup.py index b1ce695..db8a696 100644 --- a/setup.py +++ b/setup.py @@ -9,11 +9,11 @@ from setuptools import Extension, setup setup( ext_modules=[ Extension( - name="cppfimdlp", + name="fimdlp.cppfimdlp", sources=[ - "fimdlp/cfimdlp.pyx", - "fimdlp/CPPFImdlp.cpp", - "fimdlp/Metrics.cpp", + "src/fimdlp/cfimdlp.pyx", + "src/cppmdlp/CPPFImdlp.cpp", + "src/cppmdlp/Metrics.cpp", ], language="c++", include_dirs=["fimdlp"], diff --git a/src/cppmdlp/.gitignore b/src/cppmdlp/.gitignore new file mode 100644 index 0000000..fbe8f7d --- /dev/null +++ b/src/cppmdlp/.gitignore @@ -0,0 +1,36 @@ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app +**/build +**/lcoverage +.idea +cmake-* diff --git a/src/cppmdlp/CMakeLists.txt b/src/cppmdlp/CMakeLists.txt new file mode 100644 index 0000000..eefc9c6 --- /dev/null +++ b/src/cppmdlp/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.24) +project(mdlp) + +set(CMAKE_CXX_STANDARD 17) + +add_library(mdlp CPPFImdlp.cpp Metrics.cpp) + diff --git a/fimdlp/CPPFImdlp.cpp b/src/cppmdlp/CPPFImdlp.cpp similarity index 98% rename from fimdlp/CPPFImdlp.cpp rename to src/cppmdlp/CPPFImdlp.cpp index f078ecb..297dfe5 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/src/cppmdlp/CPPFImdlp.cpp @@ -1,11 +1,12 @@ #include #include #include +#include #include "CPPFImdlp.h" #include "Metrics.h" namespace mdlp { - CPPFImdlp::CPPFImdlp(bool proposal):proposal(proposal), indices(indices_t()), y(labels_t()), metrics(Metrics(y, indices)) + CPPFImdlp::CPPFImdlp(bool proposal):proposal(proposal), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices)) { } CPPFImdlp::~CPPFImdlp() diff --git a/fimdlp/CPPFImdlp.h b/src/cppmdlp/CPPFImdlp.h similarity index 100% rename from fimdlp/CPPFImdlp.h rename to src/cppmdlp/CPPFImdlp.h diff --git a/src/cppmdlp/LICENSE b/src/cppmdlp/LICENSE new file mode 100644 index 0000000..53f6b48 --- /dev/null +++ b/src/cppmdlp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Ricardo Montañana Gómez + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/fimdlp/Metrics.cpp b/src/cppmdlp/Metrics.cpp similarity index 100% rename from fimdlp/Metrics.cpp rename to src/cppmdlp/Metrics.cpp diff --git a/fimdlp/Metrics.h b/src/cppmdlp/Metrics.h similarity index 100% rename from fimdlp/Metrics.h rename to src/cppmdlp/Metrics.h diff --git a/src/cppmdlp/README.md b/src/cppmdlp/README.md new file mode 100644 index 0000000..56d9a7d --- /dev/null +++ b/src/cppmdlp/README.md @@ -0,0 +1,2 @@ +# mdlp +Discretization algorithm based on the paper by Fayyad & Irani Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning diff --git a/fimdlp/testcpp/xx/ArffFiles.cpp b/src/cppmdlp/sample/ArffFiles.cpp similarity index 100% rename from fimdlp/testcpp/xx/ArffFiles.cpp rename to src/cppmdlp/sample/ArffFiles.cpp diff --git a/fimdlp/testcpp/xx/ArffFiles.h b/src/cppmdlp/sample/ArffFiles.h similarity index 100% rename from fimdlp/testcpp/xx/ArffFiles.h rename to src/cppmdlp/sample/ArffFiles.h diff --git a/src/cppmdlp/sample/CMakeLists.txt b/src/cppmdlp/sample/CMakeLists.txt new file mode 100644 index 0000000..a6bf846 --- /dev/null +++ b/src/cppmdlp/sample/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.24) +project(main) + +set(CMAKE_CXX_STANDARD 17) + +add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp) diff --git a/fimdlp/testcpp/xx/main.cpp b/src/cppmdlp/sample/sample.cpp similarity index 91% rename from fimdlp/testcpp/xx/main.cpp rename to src/cppmdlp/sample/sample.cpp index b9e8cc5..790a2c9 100644 --- a/fimdlp/testcpp/xx/main.cpp +++ b/src/cppmdlp/sample/sample.cpp @@ -2,7 +2,7 @@ #include #include #include -#include "../../CPPFImdlp.h" +#include "../CPPFImdlp.h" using namespace std; @@ -10,7 +10,7 @@ int main(int argc, char** argv) { ArffFiles file; vector lines; - string path = "/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/"; + string path = "../../tests/datasets/"; map datasets = { {"mfeat-factors", true}, {"iris", true}, @@ -41,7 +41,7 @@ int main(int argc, char** argv) } cout << y[i] << endl; } - mdlp::CPPFImdlp test = mdlp::CPPFImdlp(); + mdlp::CPPFImdlp test = mdlp::CPPFImdlp(false); for (auto i = 0; i < attributes.size(); i++) { cout << "Cut points for " << get<0>(attributes[i]) << endl; cout << "--------------------------" << setprecision(3) << endl; diff --git a/fimdlp/testcpp/.gitignore b/src/cppmdlp/tests/.gitignore similarity index 100% rename from fimdlp/testcpp/.gitignore rename to src/cppmdlp/tests/.gitignore diff --git a/fimdlp/testcpp/CMakeLists.txt b/src/cppmdlp/tests/CMakeLists.txt similarity index 95% rename from fimdlp/testcpp/CMakeLists.txt rename to src/cppmdlp/tests/CMakeLists.txt index 301b926..c27aba4 100644 --- a/fimdlp/testcpp/CMakeLists.txt +++ b/src/cppmdlp/tests/CMakeLists.txt @@ -17,15 +17,14 @@ FetchContent_MakeAvailable(googletest) enable_testing() -add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cc) -add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../Metrics.cpp FImdlp_unittest.cc) +add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp) +add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../Metrics.cpp FImdlp_unittest.cpp) target_link_libraries(Metrics_unittest GTest::gtest_main) target_link_libraries(FImdlp_unittest GTest::gtest_main) target_compile_options(Metrics_unittest PRIVATE --coverage) target_compile_options(FImdlp_unittest PRIVATE --coverage) target_link_options(Metrics_unittest PRIVATE --coverage) target_link_options(FImdlp_unittest PRIVATE --coverage) -# -fprofile-arcs -ftest-coverage include(GoogleTest) gtest_discover_tests(Metrics_unittest) diff --git a/src/cppmdlp/tests/FImdlp_unittest.cpp b/src/cppmdlp/tests/FImdlp_unittest.cpp new file mode 100644 index 0000000..0fc0ae9 --- /dev/null +++ b/src/cppmdlp/tests/FImdlp_unittest.cpp @@ -0,0 +1,141 @@ +#include "gtest/gtest.h" +#include "../Metrics.h" +#include "../CPPFImdlp.h" +#include + +namespace mdlp { + class TestFImdlp : public CPPFImdlp, public testing::Test { + public: + precision_t precision = 0.000001; + + TestFImdlp() : CPPFImdlp(false) {} + + void SetUp() { + // 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0] + //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2) + X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9}; + y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; + proposal = false; + fit(X, y); + } + + void setProposal(bool value) { + proposal = value; + } + + // void initIndices() + // { + // indices = indices_t(); + // } + void checkSortedVector() { + indices_t testSortedIndices = sortIndices(X); + precision_t prev = X[testSortedIndices[0]]; + for (auto i = 0; i < X.size(); ++i) { + EXPECT_EQ(testSortedIndices[i], indices[i]); + EXPECT_LE(prev, X[testSortedIndices[i]]); + prev = X[testSortedIndices[i]]; + } + } + + void checkCutPoints(cutPoints_t &expected) { + int expectedSize = expected.size(); + EXPECT_EQ(cutPoints.size(), expectedSize); + for (auto i = 0; i < cutPoints.size(); i++) { + EXPECT_NEAR(cutPoints[i], expected[i], precision); + } + } + + template + void checkVectors(std::vector const &expected, std::vector const &computed) { + EXPECT_EQ(expected.size(), computed.size()); + ASSERT_EQ(expected.size(), computed.size()); + for (auto i = 0; i < expected.size(); i++) { + EXPECT_NEAR(expected[i], computed[i],precision); + } + } + }; + + TEST_F(TestFImdlp, FitErrorEmptyDataset) { + X = samples_t(); + y = labels_t(); + EXPECT_THROW(fit(X, y), std::invalid_argument); + } + + TEST_F(TestFImdlp, FitErrorDifferentSize) { + X = {1, 2, 3}; + y = {1, 2}; + EXPECT_THROW(fit(X, y), std::invalid_argument); + } + + TEST_F(TestFImdlp, SortIndices) { + X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9}; + indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7}; + checkSortedVector(); + X = {5.77, 5.88, 5.99}; + indices = {0, 1, 2}; + checkSortedVector(); + X = {5.33, 5.22, 5.11}; + indices = {2, 1, 0}; + checkSortedVector(); + } + + TEST_F(TestFImdlp, TestDataset) { + proposal = false; + fit(X, y); + computeCutPointsOriginal(0, 10); + cutPoints_t expected = {5.6499996185302734}; + vector computed = getCutPoints(); + computed = getCutPoints(); + int expectedSize = expected.size(); + EXPECT_EQ(computed.size(), expected.size()); + for (auto i = 0; i < expectedSize; i++) { + EXPECT_NEAR(computed[i], expected[i], precision); + } + } + + TEST_F(TestFImdlp, ComputeCutPointsOriginal) { + cutPoints_t expected = {5.65}; + proposal = false; + computeCutPointsOriginal(0, 10); + checkCutPoints(expected); + } + + TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) { + cutPoints_t expected; + proposal = false; + expected = {2}; + samples_t X_ = {0, 1, 2, 2}; + labels_t y_ = {1, 1, 1, 2}; + fit(X_, y_); + checkCutPoints(expected); + } + + TEST_F(TestFImdlp, ComputeCutPointsProposal) { + proposal = true; + cutPoints_t expected; + expected = {}; + fit(X, y); + computeCutPointsProposal(); + checkCutPoints(expected); + } + + TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) { + cutPoints_t expected; + expected = {1.5}; + proposal = true; + samples_t X_ = {0, 1, 2, 2}; + labels_t y_ = {1, 1, 1, 2}; + fit(X_, y_); + checkCutPoints(expected); + } + + TEST_F(TestFImdlp, GetCutPoints) { + samples_t computed, expected = {5.65}; + proposal = false; + computeCutPointsOriginal(0, 10); + computed = getCutPoints(); + for (auto item: cutPoints) + cout << setprecision(6) << item << endl; + checkVectors(expected, computed); + } +} diff --git a/fimdlp/testcpp/Metrics_unittest.cc b/src/cppmdlp/tests/Metrics_unittest.cpp similarity index 100% rename from fimdlp/testcpp/Metrics_unittest.cc rename to src/cppmdlp/tests/Metrics_unittest.cpp diff --git a/fimdlp/testcpp/cover b/src/cppmdlp/tests/cover similarity index 100% rename from fimdlp/testcpp/cover rename to src/cppmdlp/tests/cover diff --git a/fimdlp/testcpp/datasets/iris.arff b/src/cppmdlp/tests/datasets/iris.arff similarity index 100% rename from fimdlp/testcpp/datasets/iris.arff rename to src/cppmdlp/tests/datasets/iris.arff diff --git a/fimdlp/testcpp/datasets/kdd_JapaneseVowels.arff b/src/cppmdlp/tests/datasets/kdd_JapaneseVowels.arff similarity index 100% rename from fimdlp/testcpp/datasets/kdd_JapaneseVowels.arff rename to src/cppmdlp/tests/datasets/kdd_JapaneseVowels.arff diff --git a/fimdlp/testcpp/datasets/letter.arff b/src/cppmdlp/tests/datasets/letter.arff similarity index 100% rename from fimdlp/testcpp/datasets/letter.arff rename to src/cppmdlp/tests/datasets/letter.arff diff --git a/fimdlp/testcpp/datasets/mfeat-factors.arff b/src/cppmdlp/tests/datasets/mfeat-factors.arff similarity index 100% rename from fimdlp/testcpp/datasets/mfeat-factors.arff rename to src/cppmdlp/tests/datasets/mfeat-factors.arff diff --git a/fimdlp/testcpp/test b/src/cppmdlp/tests/test similarity index 100% rename from fimdlp/testcpp/test rename to src/cppmdlp/tests/test diff --git a/fimdlp/typesFImdlp.h b/src/cppmdlp/typesFImdlp.h similarity index 100% rename from fimdlp/typesFImdlp.h rename to src/cppmdlp/typesFImdlp.h diff --git a/fimdlp/__init__.py b/src/fimdlp/__init__.py similarity index 62% rename from fimdlp/__init__.py rename to src/fimdlp/__init__.py index d1675e1..3a99d3b 100644 --- a/fimdlp/__init__.py +++ b/src/fimdlp/__init__.py @@ -1,3 +1,8 @@ from ._version import __version__ + +def version(): + return __version__ + + all = ["FImdlp", "__version__"] diff --git a/fimdlp/_version.py b/src/fimdlp/_version.py similarity index 100% rename from fimdlp/_version.py rename to src/fimdlp/_version.py diff --git a/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx similarity index 91% rename from fimdlp/cfimdlp.pyx rename to src/fimdlp/cfimdlp.pyx index 3ffea79..c831389 100644 --- a/fimdlp/cfimdlp.pyx +++ b/src/fimdlp/cfimdlp.pyx @@ -3,7 +3,7 @@ from libcpp.vector cimport vector from libcpp cimport bool -cdef extern from "CPPFImdlp.h" namespace "mdlp": +cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp": ctypedef float precision_t cdef cppclass CPPFImdlp: CPPFImdlp(bool) except + @@ -22,4 +22,4 @@ cdef class CFImdlp: return self def get_cut_points(self): return self.thisptr.getCutPoints() - \ No newline at end of file + diff --git a/fimdlp/mdlp.py b/src/fimdlp/mdlp.py similarity index 97% rename from fimdlp/mdlp.py rename to src/fimdlp/mdlp.py index ab82dc6..b8f11e7 100644 --- a/fimdlp/mdlp.py +++ b/src/fimdlp/mdlp.py @@ -16,8 +16,8 @@ class FImdlp(TransformerMixin, BaseEstimator): Parameters ---------- n_jobs : int, default=-1 - The number of jobs to run in parallel. :meth:`fit` and - :meth:`transform`, are parallelized over the features. ``-1`` means + The number of jobs to run in parallel. :meth:`fit` and + :meth:`transform`, are parallelized over the features. ``-1`` means using all cores available. Attributes @@ -28,9 +28,9 @@ class FImdlp(TransformerMixin, BaseEstimator): The list of discretizers, one for each feature. cut_points_ : list The list of cut points for each feature. - X_ : array + X_ : array the samples used to fit, shape (n_samples, n_features) - y_ : array + y_ : array the labels used to fit, shape (n_samples,) features_ : list the list of features to be discretized diff --git a/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py similarity index 88% rename from fimdlp/tests/FImdlp_test.py rename to src/fimdlp/tests/FImdlp_test.py index 9e681e6..7eb282b 100644 --- a/fimdlp/tests/FImdlp_test.py +++ b/src/fimdlp/tests/FImdlp_test.py @@ -3,9 +3,14 @@ import sklearn from sklearn.datasets import load_iris import numpy as np from ..mdlp import FImdlp +from .. import version +from .._version import __version__ class FImdlpTest(unittest.TestCase): + def test_version(self): + self.assertEqual(version(), __version__) + def test_init(self): clf = FImdlp() self.assertEqual(-1, clf.n_jobs) @@ -74,6 +79,18 @@ class FImdlpTest(unittest.TestCase): clf.fit([[1, 2], [3, 4]], [1, 2], features=["a", "b", "c"]) with self.assertRaises(ValueError): clf.fit([[1, 2], [3, 4]], [1, 2], unexpected="class_name") + with self.assertRaises(ValueError): + clf.fit([[1, 2], [3, 4]], [1, 2], features="01") + with self.assertRaises(ValueError): + clf.fit([[1, 2], [3, 4]], [1, 2], features=[0, 0]) + with self.assertRaises(ValueError): + clf.fit([[1, 2], [3, 4]], [1, 2], features=[0, 2]) + + def test_fit_features(self): + clf = FImdlp() + clf.fit([[1, 2], [3, 4]], [1, 2], features=[0]) + res = clf.transform([[1, 2], [3, 4]]) + self.assertListEqual(res.tolist(), [[0, 2], [0, 4]]) def test_transform_original(self): clf = FImdlp(proposal=False) diff --git a/fimdlp/tests/__init__.py b/src/fimdlp/tests/__init__.py similarity index 62% rename from fimdlp/tests/__init__.py rename to src/fimdlp/tests/__init__.py index cac328b..8feb203 100644 --- a/fimdlp/tests/__init__.py +++ b/src/fimdlp/tests/__init__.py @@ -1,3 +1 @@ from .FImdlp_test import FImdlpTest - -all = ["FImdlpTest"]