Add mdlp c++ submodule and reconfigure

This commit is contained in:
2022-12-11 00:27:27 +01:00
parent 3d48073574
commit 252c0fef17
28 changed files with 8 additions and 34121 deletions

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "fimdlp/cppmdlp"]
path = fimdlp/cppmdlp
url = https://github.com/rmontanana/mdlp

View File

@@ -11,12 +11,8 @@ clean: ## Clean up
test:
coverage run -m unittest -v fimdlp.tests
cd fimdlp/testcpp && ./test
coverage:
if [ -d fimdlp/testcpp/build/CMakeFiles ]; then rm -fr fimdlp/testcpp/build/CMakeFiles/* ; fi;
make test
cd fimdlp/testcpp && ./cover
coverage report -m
lint: ## Lint and static-check

Binary file not shown.

273
debug.cpp
View File

@@ -1,273 +0,0 @@
std::cout << "+++++++++++++++++++++++" << std::endl;
for (size_t i = 0; i < y.size(); i++) {
printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
std::cout << "Information Gain:" << std::endl;
auto nc = Metrics::numClasses(y, indices, 0, indices.size());
for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint) {
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
}
def test(self):
print("Calculating cut points in python for first feature")
yz = self.y_.copy()
xz = X[:, 0].copy()
xz = xz[np.argsort(X[:, 0])]
yz = yz[np.argsort(X[:, 0])]
cuts = []
for i in range(1, len(yz)):
if yz[i] != yz[i - 1] and xz[i - 1] < xz[i] :
print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})")
cuts.append((xz[i] + xz[i - 1]) / 2)
print("Cuts calculados en python: ", cuts)
print("-- Cuts calculados en C++ --")
print("Cut points for each feature in Iris dataset:")
for i in range(0, 1):
# datax = self.X_[np.argsort(self.X_[:, i]), i]
# y_ = self.y_[np.argsort(self.X_[:, i])]
datax = self.X_[:, i]
y_ = self.y_
self.discretizer_.fit(datax, y_)
Xcutpoints = self.discretizer_.get_cut_points()
print(
f"New ({len(Xcutpoints)}):{self.features_[i]:20s}: "
f"{[i['toValue'] for i in Xcutpoints]}"
)
X_translated = [
f"{i['classNumber']} - ({i['start']}, {i['end']}) - "
f"({i['fromValue']}, {i['toValue']})"
for i in Xcutpoints
]
print(X_translated)
print("*******************************")
print("Disretized values:")
print(self.discretizer_.get_discretized_values())
print("*******************************")
return X
c++
i: 0 4.3, 0
i : 1 4.4, 0
i : 2 4.4, 0
i : 3 4.4, 0
i : 4 4.5, 0
i : 5 4.6, 0
i : 6 4.6, 0
i : 7 4.6, 0
i : 8 4.6, 0
i : 9 4.7, 0
i : 10 4.7, 0
i : 11 4.8, 0
i : 12 4.8, 0
i : 13 4.8, 0
i : 14 4.8, 0
i : 15 4.8, 0
i : 16 4.9, 0
i : 17 4.9, 0
i : 18 4.9, 0
i : 19 4.9, 0
i : 20 4.9, 1
python
i : 0 4.3 0
i : 1 4.4 0
i : 2 4.4 0
i : 3 4.4 0
i : 4 4.5 0
i : 5 4.6 0
i : 6 4.6 0
i : 7 4.6 0
i : 8 4.6 0
i : 9 4.7 0
i : 10 4.7 0
i : 11 4.8 0
i : 12 4.8 0
i : 13 4.8 0
i : 14 4.8 0
i : 15 4.8 0
i : 16 4.9 1
i : 17 4.9 2
i : 18 4.9 0
i : 19 4.9 0
i : 20 4.9 0
idx: 20 entropy_left : 0 entropy_right : 0.488187 -> 0 150
idx : 21 entropy_left : 0.0670374 entropy_right : 0.489381 -> 0 150
idx : 22 entropy_left : 0.125003 entropy_right : 0.490573 -> 0 150
idx : 24 entropy_left : 0.11507 entropy_right : 0.482206 -> 0 150
idx : 25 entropy_left : 0.162294 entropy_right : 0.483488 -> 0 150
idx : 29 entropy_left : 0.141244 entropy_right : 0.462922 -> 0 150
idx : 30 entropy_left : 0.178924 entropy_right : 0.464386 -> 0 150
idx : 33 entropy_left : 0.163818 entropy_right : 0.444778 -> 0 150
idx : 34 entropy_left : 0.195735 entropy_right : 0.44637 -> 0 150
idx : 44 entropy_left : 0.154253 entropy_right : 0.339183 -> 0 150
idx : 45 entropy_left : 0.178924 entropy_right : 0.34098 -> 0 150
idx : 51 entropy_left : 0.159328 entropy_right : 0.217547 -> 0 150
idx : 52 entropy_left : 0.180508 entropy_right : 0.219019 -> 0 150
idx : 53 entropy_left : 0.177368 entropy_right : 0.189687 -> 0 150
idx : 58 entropy_left : 0.265229 entropy_right : 0.196677 -> 0 150
idx : 59 entropy_left : 0.261331 entropy_right : 0.162291 -> 0 150
idx : 61 entropy_left : 0.289819 entropy_right : 0.164857 -> 0 150
idx : 62 entropy_left : 0.302928 entropy_right : 0.166175 -> 0 150
idx : 68 entropy_left : 0.36831 entropy_right : 0.174607 -> 0 150
idx : 69 entropy_left : 0.364217 entropy_right : 0.131848 -> 0 150
idx : 70 entropy_left : 0.373248 entropy_right : 0.133048 -> 0 150
idx : 71 entropy_left : 0.381826 entropy_right : 0.134273 -> 0 150
idx : 72 entropy_left : 0.377855 entropy_right : 0.0805821 -> 0 150
idx : 74 entropy_left : 0.393817 entropy_right : 0.0822096 -> 0 150
idx : 75 entropy_left : 0.401218 entropy_right : 0.0830509 -> 0 150
idx : 76 entropy_left : 0.397415 entropy_right : 0 -> 0 150
idx : 77 entropy_left : 0.4045 entropy_right : 0 -> 0 150
idx : 78 entropy_left : 0.411247 entropy_right : 0 -> 0 150
idx : 79 entropy_left : 0.417674 entropy_right : 0 -> 0 150
idx : 81 entropy_left : 0.429626 entropy_right : 0 -> 0 150
idx : 83 entropy_left : 0.440472 entropy_right : 0 -> 0 150
idx : 84 entropy_left : 0.445513 entropy_right : 0 -> 0 150
idx : 87 entropy_left : 0.459246 entropy_right : 0 -> 0 150
idx : 88 entropy_left : 0.463395 entropy_right : 0 -> 0 150
idx : 89 entropy_left : 0.467347 entropy_right : 0 -> 0 150
idx : 91 entropy_left : 0.474691 entropy_right : 0 -> 0 150
idx : 95 entropy_left : 0.487368 entropy_right : 0 -> 0 150
idx : 97 entropy_left : 0.492813 entropy_right : 0 -> 0 150
idx : 99 entropy_left : 0.497728 entropy_right : 0 -> 0 150
idx : 101 entropy_left : 0.502156 entropy_right : 0 -> 0 150
idx : 102 entropy_left : 0.504201 entropy_right : 0 -> 0 150
idx : 104 entropy_left : 0.507973 entropy_right : 0 -> 0 150
idx : 105 entropy_left : 0.509709 entropy_right : 0 -> 0 150
idx : 106 entropy_left : 0.511351 entropy_right : 0 -> 0 150
idx : 107 entropy_left : 0.512902 entropy_right : 0 -> 0 150
idx : 109 entropy_left : 0.515747 entropy_right : 0 -> 0 150
idx : 110 entropy_left : 0.517047 entropy_right : 0 -> 0 150
idx : 113 entropy_left : 0.520497 entropy_right : 0 -> 0 150
idx : 114 entropy_left : 0.521506 entropy_right : 0 -> 0 150
idx : 117 entropy_left : 0.524149 entropy_right : 0 -> 0 150
idx : 118 entropy_left : 0.52491 entropy_right : 0 -> 0 150
idx : 120 entropy_left : 0.526264 entropy_right : 0 -> 0 150
idx : 122 entropy_left : 0.52741 entropy_right : 0 -> 0 150
idx : 127 entropy_left : 0.52946 entropy_right : 0 -> 0 150
idx : 130 entropy_left : 0.530197 entropy_right : 0 -> 0 150
idx : 132 entropy_left : 0.530507 entropy_right : 0 -> 0 150
idx : 133 entropy_left : 0.530611 entropy_right : 0 -> 0 150
idx : 134 entropy_left : 0.530684 entropy_right : 0 -> 0 150
idx : 135 entropy_left : 0.530726 entropy_right : 0 -> 0 150
idx : 137 entropy_left : 0.530721 entropy_right : 0 -> 0 150
idx : 138 entropy_left : 0.530677 entropy_right : 0 -> 0 150
cut : 5.5 index : 53
start : 0 cut : 53 end : 150
k = 3 k1 = 3 k2 = 3 ent = 0.528321 ent1 = 0.177368 ent2 = 0.189687
ig = 0.342987 delta = 4.16006 N 150 term 0.0758615
¡Ding!5.5 53
idx : 20 entropy_left : 0 entropy_right : 1.5485806065228545 -> 0 150
idx : 21 entropy_left : 0.2761954276479391 entropy_right : 1.549829505666378 -> 0 150
idx : 22 entropy_left : 0.5304060778306042 entropy_right : 1.5511852922535474 -> 0 150
idx : 24 entropy_left : 0.4971501836369671 entropy_right : 1.5419822842863982 -> 0 150
idx : 25 entropy_left : 0.6395563653739031 entropy_right : 1.5433449229510985 -> 0 150
idx : 29 entropy_left : 0.574828144380386 entropy_right : 1.5202013991459298 -> 0 150
idx : 30 entropy_left : 0.6746799231474564 entropy_right : 1.521677608876836 -> 0 150
idx : 33 entropy_left : 0.6311718053929063 entropy_right : 1.4992098113026513 -> 0 150
idx : 34 entropy_left : 0.7085966983474103 entropy_right : 1.5007111828980744 -> 0 150
idx : 44 entropy_left : 0.5928251064639408 entropy_right : 1.3764263022492553 -> 0 150
idx : 45 entropy_left : 0.6531791627726858 entropy_right : 1.3779796176519241 -> 0 150
idx : 51 entropy_left : 0.5990326006132177 entropy_right : 1.2367928607774141 -> 0 150
idx : 52 entropy_left : 0.6496096346956632 entropy_right : 1.2377158231343603 -> 0 150
idx : 53 entropy_left : 0.6412482850735854 entropy_right : 1.2046986815511866 -> 0 150
idx : 58 entropy_left : 0.8211258609270055 entropy_right : 1.2056112071736118 -> 0 150
idx : 59 entropy_left : 0.8128223064150747 entropy_right : 1.167065448996099 -> 0 150
idx : 61 entropy_left : 0.8623538561746379 entropy_right : 1.1653351793699953 -> 0 150
idx : 62 entropy_left : 0.9353028851500502 entropy_right : 1.1687172769890006 -> 0 150
idx : 68 entropy_left : 1.031929035599206 entropy_right : 1.1573913563403753 -> 0 150
idx : 69 entropy_left : 1.0246284743137688 entropy_right : 1.109500797247481 -> 0 150
idx : 70 entropy_left : 1.036186417911213 entropy_right : 1.105866621101474 -> 0 150
idx : 71 entropy_left : 1.0895830429620594 entropy_right : 1.1104593064416028 -> 0 150
idx : 72 entropy_left : 1.0822273380873693 entropy_right : 1.0511407586429597 -> 0 150
idx : 74 entropy_left : 1.1015727511177442 entropy_right : 1.041722068095403 -> 0 150
idx : 75 entropy_left : 1.1457749842070042 entropy_right : 1.0462881865460743 -> 0 150
idx : 76 entropy_left : 1.1387129726704701 entropy_right : 0.9568886656798212 -> 0 150
idx : 77 entropy_left : 1.1468549240968817 entropy_right : 0.9505668528932196 -> 0 150
idx : 78 entropy_left : 1.1848333092150132 entropy_right : 0.9544340029249649 -> 0 150
idx : 79 entropy_left : 1.1918623939938016 entropy_right : 0.9477073729342066 -> 0 150
idx : 81 entropy_left : 1.2548698305334247 entropy_right : 0.9557589912150009 -> 0 150
idx : 83 entropy_left : 1.2659342914094807 entropy_right : 0.9411864371816835 -> 0 150
idx : 84 entropy_left : 1.2922669208691815 entropy_right : 0.9456603046006402 -> 0 150
idx : 87 entropy_left : 1.3041589171425696 entropy_right : 0.9182958340544896 -> 0 150
idx : 88 entropy_left : 1.327572716814381 entropy_right : 0.9235785996175947 -> 0 150
idx : 89 entropy_left : 1.330465426809402 entropy_right : 0.9127341558073343 -> 0 150
idx : 91 entropy_left : 1.3709454625942779 entropy_right : 0.9238422284571814 -> 0 150
idx : 95 entropy_left : 1.378063041001916 entropy_right : 0.8698926856041563 -> 0 150
idx : 97 entropy_left : 1.4115390027326744 entropy_right : 0.8835850861052532 -> 0 150
idx : 99 entropy_left : 1.4130351465796736 entropy_right : 0.8478617451660526 -> 0 150
idx : 101 entropy_left : 1.4412464483479606 entropy_right : 0.863120568566631 -> 0 150
idx : 102 entropy_left : 1.4415827640191903 entropy_right : 0.8426578772022391 -> 0 150
idx : 104 entropy_left : 1.4655411381577925 entropy_right : 0.8589810370425963 -> 0 150
idx : 105 entropy_left : 1.465665295753282 entropy_right : 0.8366407419411673 -> 0 150
idx : 106 entropy_left : 1.4762911618692924 entropy_right : 0.8453509366224365 -> 0 150
idx : 107 entropy_left : 1.4762132849962355 entropy_right : 0.8203636429576732 -> 0 150
idx : 109 entropy_left : 1.4951379218217782 entropy_right : 0.8390040613676977 -> 0 150
idx : 110 entropy_left : 1.4949188482339508 entropy_right : 0.8112781244591328 -> 0 150
idx : 113 entropy_left : 1.5183041104369397 entropy_right : 0.8418521897563207 -> 0 150
idx : 114 entropy_left : 1.51802714866133 entropy_right : 0.8112781244591328 -> 0 150
idx : 117 entropy_left : 1.5364854516368571 entropy_right : 0.8453509366224365 -> 0 150
idx : 118 entropy_left : 1.5361890331151247 entropy_right : 0.8112781244591328 -> 0 150
idx : 120 entropy_left : 1.5462566034163763 entropy_right : 0.8366407419411673 -> 0 150
idx : 122 entropy_left : 1.545378825051491 entropy_right : 0.74959525725948 -> 0 150
idx : 127 entropy_left : 1.5644893588382582 entropy_right : 0.828055725379504 -> 0 150
idx : 130 entropy_left : 1.562956340286807 entropy_right : 0.6098403047164004 -> 0 150
idx : 132 entropy_left : 1.5687623685201277 entropy_right : 0.6500224216483541 -> 0 150
idx : 133 entropy_left : 1.5680951037987416 entropy_right : 0.5225593745369408 -> 0 150
idx : 134 entropy_left : 1.5706540443736308 entropy_right : 0.5435644431995964 -> 0 150
idx : 135 entropy_left : 1.5699201014782036 entropy_right : 0.35335933502142136 -> 0 150
idx : 137 entropy_left : 1.5744201314186457 entropy_right : 0.39124356362925566 -> 0 150
idx : 138 entropy_left : 1.5736921054134685 entropy_right : 0 -> 0 150
¡Ding!4.9 20
k = 2 k1 = 1 k2 = 2 ent = 0.5225593745369408 ent1 = 0 ent2 = 0.5435644431995964
ig = 0.010969310349085326 delta = 2.849365059382915 N 17 term 0.4029038270225244
idx : 135 entropy_left : 0 entropy_right : 0.35335933502142136 -> 134 150
idx : 137 entropy_left : 0.9182958340544896 entropy_right : 0.39124356362925566 -> 134 150
idx : 138 entropy_left : 1.0 entropy_right : 0 -> 134 150
start : 134 cut : 135 end : 150
k = 2 k1 = 1 k2 = 2 ent = 0.5435644431995964 ent1 = 0 ent2 = 0.35335933502142136
ig = 0.21229006661701388 delta = 2.426944705701254 N 16 term 0.39586470633186077
idx : 137 entropy_left : 0 entropy_right : 0.39124356362925566 -> 135 150
idx : 138 entropy_left : 0.9182958340544896 entropy_right : 0 -> 135 150
start : 135 cut : 137 end : 150
k = 2 k1 = 1 k2 = 2 ent = 0.35335933502142136 ent1 = 0 ent2 = 0.39124356362925566
ig = 0.01428157987606643 delta = 2.8831233792732727 N 15 term 0.44603188675539174
idx : 138 entropy_left : 0 entropy_right : 0 -> 137 150
start : 137 cut : 138 end : 150
k = 2 k1 = 1 k2 = 1 ent = 0.39124356362925566 ent1 = 0 ent2 = 0
ig = 0.39124356362925566 delta = 2.0248677947990927 N 13 term 0.4315254073477115
[[4.9, 5.2, 5.4, 6.75]]
cut : 1.4 index : 81
start : 50 cut : 81 end : 96
k = 2 k1 = 2 k2 = 1 ent = 0.151097 ent1 = 0.205593 ent2 = 0
ig = 0.0125455 delta = 2.91635 N 46 term 0.182787
idx : 80 entropy_left : 0 entropy_right : 0 -> 50 81
cut : 1.4 index : 80
start : 50 cut : 80 end : 81
k = 2 k1 = 1 k2 = 1 ent = 0.205593 ent1 = 0 ent2 = 0
ig = 0.205593 delta = 2.39617 N 31 term 0.235583
idx : 112 entropy_left : 0 entropy_right : 0.175565 -> 103 150
idx : 113 entropy_left : 0.468996 entropy_right : 0 -> 103 150
cut : 1.8 index : 112
start : 103 cut : 112 end : 150
k = 2 k1 = 1 k2 = 2 ent = 0.148549 ent1 = 0 ent2 = 0.175565
ig = 0.00660326 delta = 2.86139 N 47 term 0.178403
idx : 113 entropy_left : 0 entropy_right : 0 -> 112 150
cut : 1.8 index : 113
start : 112 cut : 113 end : 150
k = 2 k1 = 1 k2 = 1 ent = 0.175565 ent1 = 0 ent2 = 0
ig = 0.175565 delta = 2.45622 N 38 term 0.201728
[[4.900000095367432, 4.949999809265137, 5.0, 5.099999904632568, 5.199999809265137, 5.25, 5.400000095367432, 5.449999809265137,
5.5, 5.550000190734863, 5.599999904632568, 5.699999809265137, 5.800000190734863, 5.900000095367432, 5.949999809265137, 6.0, 6.050000190734863,
6.099999904632568, 6.149999618530273, 6.199999809265137, 6.25, 6.300000190734863, 6.400000095367432, 6.5, 6.550000190734863, 6.649999618530273, 6.699999809265137,
6.75, 6.800000190734863, 6.850000381469727, 6.900000095367432, 6.949999809265137, 7.050000190734863]]

View File

@@ -1,183 +0,0 @@
cmake_minimum_required(VERSION 3.24)
project(fimdlp)
set(CMAKE_CXX_STANDARD 14)
include_directories(.)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal)
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom)
include_directories(testcpp/build/_deps/googletest-src/googlemock/test)
include_directories(testcpp/build/_deps/googletest-src/googletest/include)
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest)
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal)
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom)
include_directories(testcpp/build/_deps/googletest-src/googletest/samples)
include_directories(testcpp/build/_deps/googletest-src/googletest/src)
include_directories(testcpp/build/_deps/googletest-src/googletest/test)
add_executable(fimdlp
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-matchers.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-port.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-internal-utils.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-port.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-pp.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-actions.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-cardinalities.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-function-mocker.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-matchers.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-actions.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-matchers.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-nice-strict.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-spec-builders.h
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock.h
testcpp/build/_deps/googletest-src/googlemock/src/gmock-all.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-cardinalities.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-internal-utils.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-matchers.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock-spec-builders.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock.cc
testcpp/build/_deps/googletest-src/googlemock/src/gmock_main.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-actions_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-cardinalities_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-function-mocker_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-internal-utils_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-arithmetic_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-comparisons_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-containers_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-misc_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers_test.h
testcpp/build/_deps/googletest-src/googlemock/test/gmock-more-actions_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-nice-strict_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-port_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp-string_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock-spec-builders_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_all_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_ex_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_leak_test_.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link2_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.h
testcpp/build/_deps/googletest-src/googlemock/test/gmock_output_test_.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_stress_test.cc
testcpp/build/_deps/googletest-src/googlemock/test/gmock_test.cc
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-port.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-printers.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-death-test-internal.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-filepath.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-internal.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-param-util.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port-arch.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-string.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-type-util.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-assertion-result.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-death-test.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-matchers.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-message.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-param-test.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-printers.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-spi.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-test-part.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-typed-test.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_pred_impl.h
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_prod.h
testcpp/build/_deps/googletest-src/googletest/samples/prime_tables.h
testcpp/build/_deps/googletest-src/googletest/samples/sample1.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample1.h
testcpp/build/_deps/googletest-src/googletest/samples/sample10_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample1_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample2.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample2.h
testcpp/build/_deps/googletest-src/googletest/samples/sample2_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample3-inl.h
testcpp/build/_deps/googletest-src/googletest/samples/sample3_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample4.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample4.h
testcpp/build/_deps/googletest-src/googletest/samples/sample4_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample5_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample6_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample7_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample8_unittest.cc
testcpp/build/_deps/googletest-src/googletest/samples/sample9_unittest.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-all.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-assertion-result.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-death-test.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-filepath.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-internal-inl.h
testcpp/build/_deps/googletest-src/googletest/src/gtest-matchers.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-port.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-printers.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-test-part.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest-typed-test.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest.cc
testcpp/build/_deps/googletest-src/googletest/src/gtest_main.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-break-on-failure-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-catch-exceptions-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-color-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test_ex_test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-env-var-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-failfast-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-filepath-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-filter-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-global-environment-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-list-tests-unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-listener-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-message-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-options-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-output-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name1-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name2-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.h
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test2-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-port-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-printers-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-setuptestsuite-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-shuffle-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-test-part-test.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-throw-on-failure-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/googletest-uninitialized-test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test2_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.h
testcpp/build/_deps/googletest-src/googletest/test/gtest-unittest-api_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_all_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_assert_by_exception_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_environment_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_help_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_list_output_unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_main_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_no_test_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_pred_impl_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_premature_exit_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_prod_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_repeat_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_in_environment_setup_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_sole_header_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_stress_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_test_macro_stack_footprint_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_testbridge_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_throw_on_failure_ex_test.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_unittest.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile1_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile2_test_.cc
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_output_unittest_.cc
testcpp/build/_deps/googletest-src/googletest/test/production.cc
testcpp/build/_deps/googletest-src/googletest/test/production.h
testcpp/CMakeLists.txt
testcpp/FImdlp_unittest.cc
testcpp/Metrics_unittest.cc
cfimdlp.cpp
CPPFImdlp.cpp
CPPFImdlp.h
Metrics.cpp
Metrics.h
typesFImdlp.h)

View File

@@ -1,159 +0,0 @@
#include <numeric>
#include <algorithm>
#include <set>
#include "CPPFImdlp.h"
#include "Metrics.h"
namespace mdlp {
CPPFImdlp::CPPFImdlp(bool proposal):proposal(proposal), indices(indices_t()), y(labels_t()), metrics(Metrics(y, indices))
{
}
CPPFImdlp::~CPPFImdlp()
= default;
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
{
X = X_;
y = y_;
cutPoints.clear();
if (X.size() != y.size()) {
throw invalid_argument("X and y must have the same size");
}
if (X.size() == 0 || y.size() == 0) {
throw invalid_argument("X and y must have at least one element");
}
indices = sortIndices(X_);
metrics.setData(y, indices);
if (proposal)
computeCutPointsProposal();
else
computeCutPoints(0, X.size());
return *this;
}
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
{
int cut;
if (end - start < 2)
return;
cut = getCandidate(start, end);
if (cut == -1 || !mdlp(start, cut, end)) {
// cut.value == -1 means that there is no candidate in the interval
// No boundary found, so we add both ends of the interval as cutpoints
// because they were selected by the algorithm before
if (start != 0)
cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
if (end != X.size())
cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
return;
}
computeCutPoints(start, cut);
computeCutPoints(cut, end);
}
void CPPFImdlp::computeCutPointsOriginal(size_t start, size_t end)
{
precision_t cut;
if (end - start < 2)
return;
cut = getCandidate(start, end);
if (cut == -1)
return;
if (mdlp(start, cut, end)) {
cutPoints.push_back((X[indices[cut]] + X[indices[cut - 1]]) / 2);
}
computeCutPointsOriginal(start, cut);
computeCutPointsOriginal(cut, end);
}
void CPPFImdlp::computeCutPointsProposal()
{
precision_t xPrev, xCur, xPivot, cutPoint;
int yPrev, yCur, yPivot;
size_t idx, numElements, start;
xCur = xPrev = X[indices[0]];
yCur = yPrev = y[indices[0]];
numElements = indices.size() - 1;
idx = start = 0;
while (idx < numElements) {
xPivot = xCur;
yPivot = yCur;
// Read the same values and check class changes
do {
idx++;
xCur = X[indices[idx]];
yCur = y[indices[idx]];
if (yCur != yPivot && xCur == xPivot) {
yPivot = -1;
}
}
while (idx < numElements && xCur == xPivot);
// Check if the class changed and there are more than 1 element
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && mdlp(start, idx, indices.size())) {
start = idx;
cutPoint = (xPrev + xCur) / 2;
cutPoints.push_back(cutPoint);
}
yPrev = yPivot;
xPrev = xPivot;
}
}
long int CPPFImdlp::getCandidate(size_t start, size_t end)
{
long int candidate = -1, elements = end - start;
precision_t entropy_left, entropy_right, minEntropy = numeric_limits<precision_t>::max();
for (auto idx = start + 1; idx < end; idx++) {
// Cutpoints are always on boudndaries
if (y[indices[idx]] == y[indices[idx - 1]])
continue;
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
if (entropy_left + entropy_right < minEntropy) {
minEntropy = entropy_left + entropy_right;
candidate = idx;
}
}
return candidate;
}
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
{
int k, k1, k2;
precision_t ig, delta;
precision_t ent, ent1, ent2;
auto N = precision_t(end - start);
if (N < 2) {
return false;
}
k = metrics.computeNumClasses(start, end);
k1 = metrics.computeNumClasses(start, cut);
k2 = metrics.computeNumClasses(cut, end);
ent = metrics.entropy(start, end);
ent1 = metrics.entropy(start, cut);
ent2 = metrics.entropy(cut, end);
ig = metrics.informationGain(start, cut, end);
delta = log2(pow(3, precision_t(k)) - 2) -
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
precision_t term = 1 / N * (log2(N - 1) + delta);
return ig > term;
}
cutPoints_t CPPFImdlp::getCutPoints()
{
// Remove duplicates and sort
cutPoints_t output(cutPoints.size());
set<precision_t> s;
unsigned size = cutPoints.size();
for (unsigned i = 0; i < size; i++)
s.insert(cutPoints[i]);
output.assign(s.begin(), s.end());
sort(output.begin(), output.end());
return output;
}
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices(samples_t& X_)
{
indices_t idx(X_.size());
iota(idx.begin(), idx.end(), 0);
for (size_t i = 0; i < X_.size(); i++)
sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
{ return X_[i1] < X_[i2]; });
return idx;
}
}

View File

@@ -1,33 +0,0 @@
#ifndef CPPFIMDLP_H
#define CPPFIMDLP_H
#include "typesFImdlp.h"
#include "Metrics.h"
#include <utility>
namespace mdlp {
class CPPFImdlp {
protected:
bool proposal;
indices_t indices; // sorted indices to use with X and y
samples_t X;
labels_t y;
Metrics metrics;
cutPoints_t cutPoints;
static indices_t sortIndices(samples_t&);
void computeCutPoints(size_t, size_t);
long int getCandidate(size_t, size_t);
bool mdlp(size_t, size_t, size_t);
// Original algorithm
void computeCutPointsOriginal(size_t, size_t);
bool goodCut(size_t, size_t, size_t);
void computeCutPointsProposal();
public:
CPPFImdlp(bool);
~CPPFImdlp();
CPPFImdlp& fit(samples_t&, labels_t&);
samples_t getCutPoints();
};
}
#endif

View File

@@ -1,65 +0,0 @@
#include "Metrics.h"
#include <set>
#include <cmath>
using namespace std;
namespace mdlp {
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
{
}
int Metrics::computeNumClasses(size_t start, size_t end)
{
set<int> nClasses;
for (auto i = start; i < end; ++i) {
nClasses.insert(y[indices[i]]);
}
return nClasses.size();
}
void Metrics::setData(labels_t& y_, indices_t& indices_)
{
indices = indices_;
y = y_;
numClasses = computeNumClasses(0, indices.size());
entropyCache.clear();
igCache.clear();
}
precision_t Metrics::entropy(size_t start, size_t end)
{
precision_t p, ventropy = 0;
int nElements = 0;
labels_t counts(numClasses + 1, 0);
if (end - start < 2)
return 0;
if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) {
return entropyCache[make_tuple(start, end)];
}
for (auto i = &indices[start]; i != &indices[end]; ++i) {
counts[y[*i]]++;
nElements++;
}
for (auto count : counts) {
if (count > 0) {
p = (precision_t)count / nElements;
ventropy -= p * log2(p);
}
}
entropyCache[make_tuple(start, end)] = ventropy;
return ventropy;
}
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
{
precision_t iGain;
precision_t entropyInterval, entropyLeft, entropyRight;
int nElementsLeft = cut - start, nElementsRight = end - cut;
int nElements = end - start;
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
return igCache[make_tuple(start, cut, end)];
}
entropyInterval = entropy(start, end);
entropyLeft = entropy(start, cut);
entropyRight = entropy(cut, end);
iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements;
igCache[make_tuple(start, cut, end)] = iGain;
return iGain;
}
}

View File

@@ -1,20 +0,0 @@
#ifndef CCMETRICS_H
#define CCMETRICS_H
#include "typesFImdlp.h"
namespace mdlp {
class Metrics {
protected:
labels_t& y;
indices_t& indices;
int numClasses;
cacheEnt_t entropyCache;
cacheIg_t igCache;
public:
Metrics(labels_t&, indices_t&);
void setData(labels_t&, indices_t&);
int computeNumClasses(size_t, size_t);
precision_t entropy(size_t, size_t);
precision_t informationGain(size_t, size_t, size_t);
};
}
#endif

View File

@@ -3,7 +3,7 @@
from libcpp.vector cimport vector
from libcpp cimport bool
cdef extern from "CPPFImdlp.h" namespace "mdlp":
cdef extern from "cppmdlp/CPPFImdlp.h" namespace "mdlp":
ctypedef float precision_t
cdef cppclass CPPFImdlp:
CPPFImdlp(bool) except +
@@ -22,4 +22,4 @@ cdef class CFImdlp:
return self
def get_cut_points(self):
return self.thisptr.getCutPoints()

1
fimdlp/cppmdlp Submodule

Submodule fimdlp/cppmdlp added at 7d940171b5

View File

@@ -1,2 +0,0 @@
build
build/*

View File

@@ -1,33 +0,0 @@
cmake_minimum_required(VERSION 3.14)
project(FImdlp)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 14)
include(FetchContent)
include_directories(${GTEST_INCLUDE_DIRS})
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
enable_testing()
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cc)
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../Metrics.cpp FImdlp_unittest.cc)
target_link_libraries(Metrics_unittest GTest::gtest_main)
target_link_libraries(FImdlp_unittest GTest::gtest_main)
target_compile_options(Metrics_unittest PRIVATE --coverage)
target_compile_options(FImdlp_unittest PRIVATE --coverage)
target_link_options(Metrics_unittest PRIVATE --coverage)
target_link_options(FImdlp_unittest PRIVATE --coverage)
# -fprofile-arcs -ftest-coverage
include(GoogleTest)
gtest_discover_tests(Metrics_unittest)
gtest_discover_tests(FImdlp_unittest)

View File

@@ -1,166 +0,0 @@
#include "gtest/gtest.h"
#include "../Metrics.h"
#include "../CPPFImdlp.h"
namespace mdlp {
class TestFImdlp: public CPPFImdlp, public testing::Test {
public:
TestFImdlp(): CPPFImdlp(false) {}
void SetUp()
{
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
fit(X, y);
}
void setProposal(bool value)
{
proposal = value;
}
void initIndices()
{
indices = indices_t();
}
void checkSortedVector(samples_t& X_, indices_t indices_)
{
X = X_;
indices = indices_;
indices_t testSortedIndices = sortIndices(X);
precision_t prev = X[testSortedIndices[0]];
for (auto i = 0; i < X.size(); ++i) {
EXPECT_EQ(testSortedIndices[i], indices[i]);
EXPECT_LE(prev, X[testSortedIndices[i]]);
prev = X[testSortedIndices[i]];
}
}
void checkCutPoints(cutPoints_t& expected)
{
int expectedSize = expected.size();
EXPECT_EQ(cutPoints.size(), expectedSize);
for (auto i = 0; i < expectedSize; i++) {
EXPECT_EQ(cutPoints[i], expected[i]);
}
}
template<typename T, typename A>
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
{
EXPECT_EQ(expected.size(), computed.size());
for (auto i = 0; i < expected.size(); i++) {
EXPECT_EQ(expected[i], computed[i]);
}
}
};
TEST_F(TestFImdlp, FitErrorEmptyDataset)
{
X = samples_t();
y = labels_t();
EXPECT_THROW(fit(X, y), std::invalid_argument);
}
}
//
// TEST_F(TestFImdlp, FitErrorDifferentSize)
// {
// X = { 1, 2, 3 };
// y = { 1, 2 };
// EXPECT_THROW(fit(X, y), std::invalid_argument);
// }
// TEST_F(TestFImdlp, SortIndices)
// {
// X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
// indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
// checkSortedVector(X, indices);
// X = { 5.77, 5.88, 5.99 };
// indices = { 0, 1, 2 };
// checkSortedVector(X, indices);
// X = { 5.33, 5.22, 5.11 };
// indices = { 2, 1, 0 };
// checkSortedVector(X, indices);
// }
// TEST_F(TestFImdlp, EvaluateCutPoint)
// {
// cutPoint_t rest, candidate;
// rest = { 0, 10, -1, -1, 1000 };
// candidate = { 0, 4, -1, -1, 5.15 };
// EXPECT_FALSE(evaluateCutPoint(rest, candidate));
// }
// TEST_F(TestFImdlp, ComputeCutPointsOriginal)
// {
// cutPoints_t expected;
// expected = {
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
// };
// setCutPoints(cutPoints_t());
// computeCutPointsOriginal();
// checkCutPoints(expected);
// }
// TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
// {
// cutPoints_t expected;
// expected = {
// { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
// };
// X = { 0, 1, 2, 2 };
// y = { 1, 1, 1, 2 };
// fit(X, y);
// computeCutPointsOriginal();
// checkCutPoints(expected);
// }
// TEST_F(TestFImdlp, ComputeCutPointsProposal)
// {
// cutPoints_t expected;
// expected = {
// { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
// { 6, 9, -1, 5.4, 5.85 },
// { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
// };
// computeCutPointsProposal();
// checkCutPoints(expected);
// }
// TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
// {
// cutPoints_t expected;
// expected = {
// { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
// { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
// };
// X = { 0, 1, 2, 2 };
// y = { 1, 1, 1, 2 };
// fit(X, y);
// computeCutPointsProposal();
// checkCutPoints(expected);
// }
// TEST_F(TestFImdlp, DiscretizedValues)
// {
// labels_t computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
// computed = getDiscretizedValues();
// checkVectors(expected, computed);
// }
// TEST_F(TestFImdlp, GetCutPoints)
// {
// samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
// computeCutPointsOriginal();
// computed = getCutPoints();
// checkVectors(expected, computed);
// }
// TEST_F(TestFImdlp, Constructor)
// {
// samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
// labels_t y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
// setProposal(false);
// fit(X, y);
// computeCutPointsOriginal();
// cutPoints_t expected;
// vector<precision_t> computed = getCutPoints();
// expected = {
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
// };
// computed = getCutPoints();
// int expectedSize = expected.size();
// EXPECT_EQ(computed.size(), expected.size());
// for (auto i = 0; i < expectedSize; i++) {
// EXPECT_NEAR(computed[i], expected[i].toValue, .00000001);
// }
// }
//}

View File

@@ -1,43 +0,0 @@
#include "gtest/gtest.h"
#include "../Metrics.h"
namespace mdlp {
class TestMetrics: public Metrics, public testing::Test {
public:
labels_t y;
samples_t X;
indices_t indices;
precision_t precision = 0.000001;
TestMetrics(): Metrics(y, indices) {}
void SetUp()
{
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
setData(y, indices);
}
};
TEST_F(TestMetrics, NumClasses)
{
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
EXPECT_EQ(1, computeNumClasses(4, 8));
EXPECT_EQ(2, computeNumClasses(0, 10));
EXPECT_EQ(2, computeNumClasses(8, 10));
}
TEST_F(TestMetrics, Entropy)
{
EXPECT_EQ(1, entropy(0, 10));
EXPECT_EQ(0, entropy(0, 5));
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
setData(y, indices);
ASSERT_NEAR(0.468996, entropy(0, 10), precision);
}
TEST_F(TestMetrics, InformationGain)
{
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
setData(y, indices);
ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision);
}
}

View File

@@ -1,4 +0,0 @@
rm -fr lcoverage/*
lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
genhtml lcoverage/main_coverage.info --output-directory lcoverage
open lcoverage/index.html

View File

@@ -1,225 +0,0 @@
% 1. Title: Iris Plants Database
%
% 2. Sources:
% (a) Creator: R.A. Fisher
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
% (c) Date: July, 1988
%
% 3. Past Usage:
% - Publications: too many to mention!!! Here are a few.
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
% to Mathematical Statistics" (John Wiley, NY, 1950).
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
% Structure and Classification Rule for Recognition in Partially Exposed
% Environments". IEEE Transactions on Pattern Analysis and Machine
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
% -- Results:
% -- very low misclassification rates (0% for the setosa class)
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
% Transactions on Information Theory, May 1972, 431-433.
% -- Results:
% -- very low misclassification rates again
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
% conceptual clustering system finds 3 classes in the data.
%
% 4. Relevant Information:
% --- This is perhaps the best known database to be found in the pattern
% recognition literature. Fisher's paper is a classic in the field
% and is referenced frequently to this day. (See Duda & Hart, for
% example.) The data set contains 3 classes of 50 instances each,
% where each class refers to a type of iris plant. One class is
% linearly separable from the other 2; the latter are NOT linearly
% separable from each other.
% --- Predicted attribute: class of iris plant.
% --- This is an exceedingly simple domain.
%
% 5. Number of Instances: 150 (50 in each of three classes)
%
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
%
% 7. Attribute Information:
% 1. sepal length in cm
% 2. sepal width in cm
% 3. petal length in cm
% 4. petal width in cm
% 5. class:
% -- Iris Setosa
% -- Iris Versicolour
% -- Iris Virginica
%
% 8. Missing Attribute Values: None
%
% Summary Statistics:
% Min Max Mean SD Class Correlation
% sepal length: 4.3 7.9 5.84 0.83 0.7826
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
%
% 9. Class Distribution: 33.3% for each of 3 classes.
@RELATION iris
@ATTRIBUTE sepallength REAL
@ATTRIBUTE sepalwidth REAL
@ATTRIBUTE petallength REAL
@ATTRIBUTE petalwidth REAL
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
@DATA
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
%
%
%

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +0,0 @@
cmake -S . -B build -Wno-dev
if test $? -ne 0; then
echo "Error in creating build commands."
exit 1
fi
cmake --build build
if test $? -ne 0; then
echo "Error in build command."
exit 1
fi
cd build
ctest --output-on-failure

View File

@@ -1,117 +0,0 @@
#include "ArffFiles.h"
#include <fstream>
#include <sstream>
#include <map>
#include <iostream>
using namespace std;
ArffFiles::ArffFiles()
{
}
vector<string> ArffFiles::getLines()
{
return lines;
}
unsigned long int ArffFiles::getSize()
{
return lines.size();
}
vector<tuple<string, string>> ArffFiles::getAttributes()
{
return attributes;
}
string ArffFiles::getClassName()
{
return className;
}
string ArffFiles::getClassType()
{
return classType;
}
vector<vector<float>>& ArffFiles::getX()
{
return X;
}
vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::load(string fileName, bool classLast)
{
ifstream file(fileName);
string keyword, attribute, type;
if (file.is_open()) {
string line;
while (getline(file, line)) {
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
ss >> keyword >> attribute >> type;
attributes.push_back(make_tuple(attribute, type));
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw invalid_argument("No attributes found");
if (classLast) {
className = get<0>(attributes.back());
classType = get<1>(attributes.back());
attributes.pop_back();
} else {
className = get<0>(attributes.front());
classType = get<1>(attributes.front());
attributes.erase(attributes.begin());
}
generateDataset(classLast);
} else
throw invalid_argument("Unable to open file");
}
void ArffFiles::generateDataset(bool classLast)
{
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
vector<string> yy = vector<string>(lines.size(), "");
int labelIndex = classLast ? attributes.size() : 0;
for (int i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]);
string value;
int pos = 0, xIndex = 0;
while (getline(ss, value, ',')) {
if (pos++ == labelIndex) {
yy[i] = value;
} else {
X[xIndex++][i] = stof(value);
}
}
}
y = factorize(yy);
}
string ArffFiles::trim(const string& source)
{
string s(source);
s.erase(0, s.find_first_not_of(" \n\r\t"));
s.erase(s.find_last_not_of(" \n\r\t") + 1);
return s;
}
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
{
vector<int> yy;
yy.reserve(labels_t.size());
map<string, int> labelMap;
int i = 0;
for (string label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}
yy.push_back(labelMap[label]);
}
return yy;
}

View File

@@ -1,28 +0,0 @@
#ifndef ARFFFILES_H
#define ARFFFILES_H
#include <string>
#include <vector>
#include <tuple>
using namespace std;
class ArffFiles {
private:
vector<string> lines;
vector<tuple<string, string>> attributes;
string className, classType;
vector<vector<float>> X;
vector<int> y;
void generateDataset(bool);
public:
ArffFiles();
void load(string, bool = true);
vector<string> getLines();
unsigned long int getSize();
string getClassName();
string getClassType();
string trim(const string&);
vector<vector<float>>& getX();
vector<int>& getY();
vector<tuple<string, string>> getAttributes();
vector<int> factorize(const vector<string>& labels_t);
};
#endif

View File

@@ -1,6 +0,0 @@
cmake_minimum_required(VERSION 3.24)
project(main)
set(CMAKE_CXX_STANDARD 17)
add_executable(main main.cpp ArffFiles.cpp ../../Metrics.cpp ../../CPPFImdlp.cpp)

View File

@@ -1,54 +0,0 @@
#include "ArffFiles.h"
#include <iostream>
#include <vector>
#include <iomanip>
#include "../../CPPFImdlp.h"
using namespace std;
int main(int argc, char** argv)
{
ArffFiles file;
vector<string> lines;
string path = "/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/";
map<string, bool > datasets = {
{"mfeat-factors", true},
{"iris", true},
{"letter", true},
{"kdd_JapaneseVowels", false}
};
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
return 1;
}
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
auto attributes = file.getAttributes();
int items = file.getSize();
cout << "Number of lines: " << items << endl;
cout << "Attributes: " << endl;
for (auto attribute : attributes) {
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
}
cout << "Class name: " << file.getClassName() << endl;
cout << "Class type: " << file.getClassType() << endl;
cout << "Data: " << endl;
vector<vector<float>>& X = file.getX();
vector<int>& y = file.getY();
for (int i = 0; i < 50; i++) {
for (auto feature : X) {
cout << fixed << setprecision(1) << feature[i] << " ";
}
cout << y[i] << endl;
}
mdlp::CPPFImdlp test = mdlp::CPPFImdlp();
for (auto i = 0; i < attributes.size(); i++) {
cout << "Cut points for " << get<0>(attributes[i]) << endl;
cout << "--------------------------" << setprecision(3) << endl;
test.fit(X[i], y);
for (auto item : test.getCutPoints()) {
cout << item << endl;
}
}
return 0;
}

View File

@@ -1,16 +0,0 @@
#ifndef TYPES_H
#define TYPES_H
#include <vector>
#include <map>
using namespace std;
namespace mdlp {
typedef float precision_t;
typedef vector<precision_t> samples_t;
typedef vector<int> labels_t;
typedef vector<size_t> indices_t;
typedef vector<precision_t> cutPoints_t;
typedef map<tuple<int, int>, precision_t> cacheEnt_t;
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
}
#endif

View File

@@ -12,8 +12,8 @@ setup(
name="cppfimdlp",
sources=[
"fimdlp/cfimdlp.pyx",
"fimdlp/CPPFImdlp.cpp",
"fimdlp/Metrics.cpp",
"fimdlp/cppmdlp/CPPFImdlp.cpp",
"fimdlp/cppmdlp/Metrics.cpp",
],
language="c++",
include_dirs=["fimdlp"],