mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-16 16:05:52 +00:00
Add mdlp c++ submodule and reconfigure
This commit is contained in:
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "fimdlp/cppmdlp"]
|
||||
path = fimdlp/cppmdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
4
Makefile
4
Makefile
@@ -11,12 +11,8 @@ clean: ## Clean up
|
||||
|
||||
test:
|
||||
coverage run -m unittest -v fimdlp.tests
|
||||
cd fimdlp/testcpp && ./test
|
||||
|
||||
coverage:
|
||||
if [ -d fimdlp/testcpp/build/CMakeFiles ]; then rm -fr fimdlp/testcpp/build/CMakeFiles/* ; fi;
|
||||
make test
|
||||
cd fimdlp/testcpp && ./cover
|
||||
coverage report -m
|
||||
|
||||
lint: ## Lint and static-check
|
||||
|
Binary file not shown.
273
debug.cpp
273
debug.cpp
@@ -1,273 +0,0 @@
|
||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||
for (size_t i = 0; i < y.size(); i++) {
|
||||
printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]);
|
||||
}
|
||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||
|
||||
std::cout << "Information Gain:" << std::endl;
|
||||
auto nc = Metrics::numClasses(y, indices, 0, indices.size());
|
||||
for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint) {
|
||||
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
|
||||
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
|
||||
}
|
||||
|
||||
def test(self):
|
||||
print("Calculating cut points in python for first feature")
|
||||
yz = self.y_.copy()
|
||||
xz = X[:, 0].copy()
|
||||
xz = xz[np.argsort(X[:, 0])]
|
||||
yz = yz[np.argsort(X[:, 0])]
|
||||
cuts = []
|
||||
for i in range(1, len(yz)):
|
||||
if yz[i] != yz[i - 1] and xz[i - 1] < xz[i] :
|
||||
print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})")
|
||||
cuts.append((xz[i] + xz[i - 1]) / 2)
|
||||
print("Cuts calculados en python: ", cuts)
|
||||
print("-- Cuts calculados en C++ --")
|
||||
print("Cut points for each feature in Iris dataset:")
|
||||
for i in range(0, 1):
|
||||
# datax = self.X_[np.argsort(self.X_[:, i]), i]
|
||||
# y_ = self.y_[np.argsort(self.X_[:, i])]
|
||||
datax = self.X_[:, i]
|
||||
y_ = self.y_
|
||||
self.discretizer_.fit(datax, y_)
|
||||
Xcutpoints = self.discretizer_.get_cut_points()
|
||||
print(
|
||||
f"New ({len(Xcutpoints)}):{self.features_[i]:20s}: "
|
||||
f"{[i['toValue'] for i in Xcutpoints]}"
|
||||
)
|
||||
X_translated = [
|
||||
f"{i['classNumber']} - ({i['start']}, {i['end']}) - "
|
||||
f"({i['fromValue']}, {i['toValue']})"
|
||||
for i in Xcutpoints
|
||||
]
|
||||
print(X_translated)
|
||||
print("*******************************")
|
||||
print("Disretized values:")
|
||||
print(self.discretizer_.get_discretized_values())
|
||||
print("*******************************")
|
||||
return X
|
||||
|
||||
c++
|
||||
i: 0 4.3, 0
|
||||
i : 1 4.4, 0
|
||||
i : 2 4.4, 0
|
||||
i : 3 4.4, 0
|
||||
i : 4 4.5, 0
|
||||
i : 5 4.6, 0
|
||||
i : 6 4.6, 0
|
||||
i : 7 4.6, 0
|
||||
i : 8 4.6, 0
|
||||
i : 9 4.7, 0
|
||||
i : 10 4.7, 0
|
||||
i : 11 4.8, 0
|
||||
i : 12 4.8, 0
|
||||
i : 13 4.8, 0
|
||||
i : 14 4.8, 0
|
||||
i : 15 4.8, 0
|
||||
i : 16 4.9, 0
|
||||
i : 17 4.9, 0
|
||||
i : 18 4.9, 0
|
||||
i : 19 4.9, 0
|
||||
i : 20 4.9, 1
|
||||
|
||||
python
|
||||
i : 0 4.3 0
|
||||
i : 1 4.4 0
|
||||
i : 2 4.4 0
|
||||
i : 3 4.4 0
|
||||
i : 4 4.5 0
|
||||
i : 5 4.6 0
|
||||
i : 6 4.6 0
|
||||
i : 7 4.6 0
|
||||
i : 8 4.6 0
|
||||
i : 9 4.7 0
|
||||
i : 10 4.7 0
|
||||
i : 11 4.8 0
|
||||
i : 12 4.8 0
|
||||
i : 13 4.8 0
|
||||
i : 14 4.8 0
|
||||
i : 15 4.8 0
|
||||
i : 16 4.9 1
|
||||
i : 17 4.9 2
|
||||
i : 18 4.9 0
|
||||
i : 19 4.9 0
|
||||
i : 20 4.9 0
|
||||
|
||||
|
||||
|
||||
idx: 20 entropy_left : 0 entropy_right : 0.488187 -> 0 150
|
||||
idx : 21 entropy_left : 0.0670374 entropy_right : 0.489381 -> 0 150
|
||||
idx : 22 entropy_left : 0.125003 entropy_right : 0.490573 -> 0 150
|
||||
idx : 24 entropy_left : 0.11507 entropy_right : 0.482206 -> 0 150
|
||||
idx : 25 entropy_left : 0.162294 entropy_right : 0.483488 -> 0 150
|
||||
idx : 29 entropy_left : 0.141244 entropy_right : 0.462922 -> 0 150
|
||||
idx : 30 entropy_left : 0.178924 entropy_right : 0.464386 -> 0 150
|
||||
idx : 33 entropy_left : 0.163818 entropy_right : 0.444778 -> 0 150
|
||||
idx : 34 entropy_left : 0.195735 entropy_right : 0.44637 -> 0 150
|
||||
idx : 44 entropy_left : 0.154253 entropy_right : 0.339183 -> 0 150
|
||||
idx : 45 entropy_left : 0.178924 entropy_right : 0.34098 -> 0 150
|
||||
idx : 51 entropy_left : 0.159328 entropy_right : 0.217547 -> 0 150
|
||||
idx : 52 entropy_left : 0.180508 entropy_right : 0.219019 -> 0 150
|
||||
idx : 53 entropy_left : 0.177368 entropy_right : 0.189687 -> 0 150
|
||||
idx : 58 entropy_left : 0.265229 entropy_right : 0.196677 -> 0 150
|
||||
idx : 59 entropy_left : 0.261331 entropy_right : 0.162291 -> 0 150
|
||||
idx : 61 entropy_left : 0.289819 entropy_right : 0.164857 -> 0 150
|
||||
idx : 62 entropy_left : 0.302928 entropy_right : 0.166175 -> 0 150
|
||||
idx : 68 entropy_left : 0.36831 entropy_right : 0.174607 -> 0 150
|
||||
idx : 69 entropy_left : 0.364217 entropy_right : 0.131848 -> 0 150
|
||||
idx : 70 entropy_left : 0.373248 entropy_right : 0.133048 -> 0 150
|
||||
idx : 71 entropy_left : 0.381826 entropy_right : 0.134273 -> 0 150
|
||||
idx : 72 entropy_left : 0.377855 entropy_right : 0.0805821 -> 0 150
|
||||
idx : 74 entropy_left : 0.393817 entropy_right : 0.0822096 -> 0 150
|
||||
idx : 75 entropy_left : 0.401218 entropy_right : 0.0830509 -> 0 150
|
||||
idx : 76 entropy_left : 0.397415 entropy_right : 0 -> 0 150
|
||||
idx : 77 entropy_left : 0.4045 entropy_right : 0 -> 0 150
|
||||
idx : 78 entropy_left : 0.411247 entropy_right : 0 -> 0 150
|
||||
idx : 79 entropy_left : 0.417674 entropy_right : 0 -> 0 150
|
||||
idx : 81 entropy_left : 0.429626 entropy_right : 0 -> 0 150
|
||||
idx : 83 entropy_left : 0.440472 entropy_right : 0 -> 0 150
|
||||
idx : 84 entropy_left : 0.445513 entropy_right : 0 -> 0 150
|
||||
idx : 87 entropy_left : 0.459246 entropy_right : 0 -> 0 150
|
||||
idx : 88 entropy_left : 0.463395 entropy_right : 0 -> 0 150
|
||||
idx : 89 entropy_left : 0.467347 entropy_right : 0 -> 0 150
|
||||
idx : 91 entropy_left : 0.474691 entropy_right : 0 -> 0 150
|
||||
idx : 95 entropy_left : 0.487368 entropy_right : 0 -> 0 150
|
||||
idx : 97 entropy_left : 0.492813 entropy_right : 0 -> 0 150
|
||||
idx : 99 entropy_left : 0.497728 entropy_right : 0 -> 0 150
|
||||
idx : 101 entropy_left : 0.502156 entropy_right : 0 -> 0 150
|
||||
idx : 102 entropy_left : 0.504201 entropy_right : 0 -> 0 150
|
||||
idx : 104 entropy_left : 0.507973 entropy_right : 0 -> 0 150
|
||||
idx : 105 entropy_left : 0.509709 entropy_right : 0 -> 0 150
|
||||
idx : 106 entropy_left : 0.511351 entropy_right : 0 -> 0 150
|
||||
idx : 107 entropy_left : 0.512902 entropy_right : 0 -> 0 150
|
||||
idx : 109 entropy_left : 0.515747 entropy_right : 0 -> 0 150
|
||||
idx : 110 entropy_left : 0.517047 entropy_right : 0 -> 0 150
|
||||
idx : 113 entropy_left : 0.520497 entropy_right : 0 -> 0 150
|
||||
idx : 114 entropy_left : 0.521506 entropy_right : 0 -> 0 150
|
||||
idx : 117 entropy_left : 0.524149 entropy_right : 0 -> 0 150
|
||||
idx : 118 entropy_left : 0.52491 entropy_right : 0 -> 0 150
|
||||
idx : 120 entropy_left : 0.526264 entropy_right : 0 -> 0 150
|
||||
idx : 122 entropy_left : 0.52741 entropy_right : 0 -> 0 150
|
||||
idx : 127 entropy_left : 0.52946 entropy_right : 0 -> 0 150
|
||||
idx : 130 entropy_left : 0.530197 entropy_right : 0 -> 0 150
|
||||
idx : 132 entropy_left : 0.530507 entropy_right : 0 -> 0 150
|
||||
idx : 133 entropy_left : 0.530611 entropy_right : 0 -> 0 150
|
||||
idx : 134 entropy_left : 0.530684 entropy_right : 0 -> 0 150
|
||||
idx : 135 entropy_left : 0.530726 entropy_right : 0 -> 0 150
|
||||
idx : 137 entropy_left : 0.530721 entropy_right : 0 -> 0 150
|
||||
idx : 138 entropy_left : 0.530677 entropy_right : 0 -> 0 150
|
||||
cut : 5.5 index : 53
|
||||
start : 0 cut : 53 end : 150
|
||||
k = 3 k1 = 3 k2 = 3 ent = 0.528321 ent1 = 0.177368 ent2 = 0.189687
|
||||
ig = 0.342987 delta = 4.16006 N 150 term 0.0758615
|
||||
¡Ding!5.5 53
|
||||
|
||||
|
||||
idx : 20 entropy_left : 0 entropy_right : 1.5485806065228545 -> 0 150
|
||||
idx : 21 entropy_left : 0.2761954276479391 entropy_right : 1.549829505666378 -> 0 150
|
||||
idx : 22 entropy_left : 0.5304060778306042 entropy_right : 1.5511852922535474 -> 0 150
|
||||
idx : 24 entropy_left : 0.4971501836369671 entropy_right : 1.5419822842863982 -> 0 150
|
||||
idx : 25 entropy_left : 0.6395563653739031 entropy_right : 1.5433449229510985 -> 0 150
|
||||
idx : 29 entropy_left : 0.574828144380386 entropy_right : 1.5202013991459298 -> 0 150
|
||||
idx : 30 entropy_left : 0.6746799231474564 entropy_right : 1.521677608876836 -> 0 150
|
||||
idx : 33 entropy_left : 0.6311718053929063 entropy_right : 1.4992098113026513 -> 0 150
|
||||
idx : 34 entropy_left : 0.7085966983474103 entropy_right : 1.5007111828980744 -> 0 150
|
||||
idx : 44 entropy_left : 0.5928251064639408 entropy_right : 1.3764263022492553 -> 0 150
|
||||
idx : 45 entropy_left : 0.6531791627726858 entropy_right : 1.3779796176519241 -> 0 150
|
||||
idx : 51 entropy_left : 0.5990326006132177 entropy_right : 1.2367928607774141 -> 0 150
|
||||
idx : 52 entropy_left : 0.6496096346956632 entropy_right : 1.2377158231343603 -> 0 150
|
||||
idx : 53 entropy_left : 0.6412482850735854 entropy_right : 1.2046986815511866 -> 0 150
|
||||
idx : 58 entropy_left : 0.8211258609270055 entropy_right : 1.2056112071736118 -> 0 150
|
||||
idx : 59 entropy_left : 0.8128223064150747 entropy_right : 1.167065448996099 -> 0 150
|
||||
idx : 61 entropy_left : 0.8623538561746379 entropy_right : 1.1653351793699953 -> 0 150
|
||||
idx : 62 entropy_left : 0.9353028851500502 entropy_right : 1.1687172769890006 -> 0 150
|
||||
idx : 68 entropy_left : 1.031929035599206 entropy_right : 1.1573913563403753 -> 0 150
|
||||
idx : 69 entropy_left : 1.0246284743137688 entropy_right : 1.109500797247481 -> 0 150
|
||||
idx : 70 entropy_left : 1.036186417911213 entropy_right : 1.105866621101474 -> 0 150
|
||||
idx : 71 entropy_left : 1.0895830429620594 entropy_right : 1.1104593064416028 -> 0 150
|
||||
idx : 72 entropy_left : 1.0822273380873693 entropy_right : 1.0511407586429597 -> 0 150
|
||||
idx : 74 entropy_left : 1.1015727511177442 entropy_right : 1.041722068095403 -> 0 150
|
||||
idx : 75 entropy_left : 1.1457749842070042 entropy_right : 1.0462881865460743 -> 0 150
|
||||
idx : 76 entropy_left : 1.1387129726704701 entropy_right : 0.9568886656798212 -> 0 150
|
||||
idx : 77 entropy_left : 1.1468549240968817 entropy_right : 0.9505668528932196 -> 0 150
|
||||
idx : 78 entropy_left : 1.1848333092150132 entropy_right : 0.9544340029249649 -> 0 150
|
||||
idx : 79 entropy_left : 1.1918623939938016 entropy_right : 0.9477073729342066 -> 0 150
|
||||
idx : 81 entropy_left : 1.2548698305334247 entropy_right : 0.9557589912150009 -> 0 150
|
||||
idx : 83 entropy_left : 1.2659342914094807 entropy_right : 0.9411864371816835 -> 0 150
|
||||
idx : 84 entropy_left : 1.2922669208691815 entropy_right : 0.9456603046006402 -> 0 150
|
||||
idx : 87 entropy_left : 1.3041589171425696 entropy_right : 0.9182958340544896 -> 0 150
|
||||
idx : 88 entropy_left : 1.327572716814381 entropy_right : 0.9235785996175947 -> 0 150
|
||||
idx : 89 entropy_left : 1.330465426809402 entropy_right : 0.9127341558073343 -> 0 150
|
||||
idx : 91 entropy_left : 1.3709454625942779 entropy_right : 0.9238422284571814 -> 0 150
|
||||
idx : 95 entropy_left : 1.378063041001916 entropy_right : 0.8698926856041563 -> 0 150
|
||||
idx : 97 entropy_left : 1.4115390027326744 entropy_right : 0.8835850861052532 -> 0 150
|
||||
idx : 99 entropy_left : 1.4130351465796736 entropy_right : 0.8478617451660526 -> 0 150
|
||||
idx : 101 entropy_left : 1.4412464483479606 entropy_right : 0.863120568566631 -> 0 150
|
||||
idx : 102 entropy_left : 1.4415827640191903 entropy_right : 0.8426578772022391 -> 0 150
|
||||
idx : 104 entropy_left : 1.4655411381577925 entropy_right : 0.8589810370425963 -> 0 150
|
||||
idx : 105 entropy_left : 1.465665295753282 entropy_right : 0.8366407419411673 -> 0 150
|
||||
idx : 106 entropy_left : 1.4762911618692924 entropy_right : 0.8453509366224365 -> 0 150
|
||||
idx : 107 entropy_left : 1.4762132849962355 entropy_right : 0.8203636429576732 -> 0 150
|
||||
idx : 109 entropy_left : 1.4951379218217782 entropy_right : 0.8390040613676977 -> 0 150
|
||||
idx : 110 entropy_left : 1.4949188482339508 entropy_right : 0.8112781244591328 -> 0 150
|
||||
idx : 113 entropy_left : 1.5183041104369397 entropy_right : 0.8418521897563207 -> 0 150
|
||||
idx : 114 entropy_left : 1.51802714866133 entropy_right : 0.8112781244591328 -> 0 150
|
||||
idx : 117 entropy_left : 1.5364854516368571 entropy_right : 0.8453509366224365 -> 0 150
|
||||
idx : 118 entropy_left : 1.5361890331151247 entropy_right : 0.8112781244591328 -> 0 150
|
||||
idx : 120 entropy_left : 1.5462566034163763 entropy_right : 0.8366407419411673 -> 0 150
|
||||
idx : 122 entropy_left : 1.545378825051491 entropy_right : 0.74959525725948 -> 0 150
|
||||
idx : 127 entropy_left : 1.5644893588382582 entropy_right : 0.828055725379504 -> 0 150
|
||||
idx : 130 entropy_left : 1.562956340286807 entropy_right : 0.6098403047164004 -> 0 150
|
||||
idx : 132 entropy_left : 1.5687623685201277 entropy_right : 0.6500224216483541 -> 0 150
|
||||
idx : 133 entropy_left : 1.5680951037987416 entropy_right : 0.5225593745369408 -> 0 150
|
||||
idx : 134 entropy_left : 1.5706540443736308 entropy_right : 0.5435644431995964 -> 0 150
|
||||
idx : 135 entropy_left : 1.5699201014782036 entropy_right : 0.35335933502142136 -> 0 150
|
||||
idx : 137 entropy_left : 1.5744201314186457 entropy_right : 0.39124356362925566 -> 0 150
|
||||
idx : 138 entropy_left : 1.5736921054134685 entropy_right : 0 -> 0 150
|
||||
¡Ding!4.9 20
|
||||
|
||||
k = 2 k1 = 1 k2 = 2 ent = 0.5225593745369408 ent1 = 0 ent2 = 0.5435644431995964
|
||||
ig = 0.010969310349085326 delta = 2.849365059382915 N 17 term 0.4029038270225244
|
||||
idx : 135 entropy_left : 0 entropy_right : 0.35335933502142136 -> 134 150
|
||||
idx : 137 entropy_left : 0.9182958340544896 entropy_right : 0.39124356362925566 -> 134 150
|
||||
idx : 138 entropy_left : 1.0 entropy_right : 0 -> 134 150
|
||||
start : 134 cut : 135 end : 150
|
||||
k = 2 k1 = 1 k2 = 2 ent = 0.5435644431995964 ent1 = 0 ent2 = 0.35335933502142136
|
||||
ig = 0.21229006661701388 delta = 2.426944705701254 N 16 term 0.39586470633186077
|
||||
idx : 137 entropy_left : 0 entropy_right : 0.39124356362925566 -> 135 150
|
||||
idx : 138 entropy_left : 0.9182958340544896 entropy_right : 0 -> 135 150
|
||||
start : 135 cut : 137 end : 150
|
||||
k = 2 k1 = 1 k2 = 2 ent = 0.35335933502142136 ent1 = 0 ent2 = 0.39124356362925566
|
||||
ig = 0.01428157987606643 delta = 2.8831233792732727 N 15 term 0.44603188675539174
|
||||
idx : 138 entropy_left : 0 entropy_right : 0 -> 137 150
|
||||
start : 137 cut : 138 end : 150
|
||||
k = 2 k1 = 1 k2 = 1 ent = 0.39124356362925566 ent1 = 0 ent2 = 0
|
||||
ig = 0.39124356362925566 delta = 2.0248677947990927 N 13 term 0.4315254073477115
|
||||
[[4.9, 5.2, 5.4, 6.75]]
|
||||
|
||||
|
||||
cut : 1.4 index : 81
|
||||
start : 50 cut : 81 end : 96
|
||||
k = 2 k1 = 2 k2 = 1 ent = 0.151097 ent1 = 0.205593 ent2 = 0
|
||||
ig = 0.0125455 delta = 2.91635 N 46 term 0.182787
|
||||
idx : 80 entropy_left : 0 entropy_right : 0 -> 50 81
|
||||
cut : 1.4 index : 80
|
||||
start : 50 cut : 80 end : 81
|
||||
k = 2 k1 = 1 k2 = 1 ent = 0.205593 ent1 = 0 ent2 = 0
|
||||
ig = 0.205593 delta = 2.39617 N 31 term 0.235583
|
||||
idx : 112 entropy_left : 0 entropy_right : 0.175565 -> 103 150
|
||||
idx : 113 entropy_left : 0.468996 entropy_right : 0 -> 103 150
|
||||
cut : 1.8 index : 112
|
||||
start : 103 cut : 112 end : 150
|
||||
k = 2 k1 = 1 k2 = 2 ent = 0.148549 ent1 = 0 ent2 = 0.175565
|
||||
ig = 0.00660326 delta = 2.86139 N 47 term 0.178403
|
||||
idx : 113 entropy_left : 0 entropy_right : 0 -> 112 150
|
||||
cut : 1.8 index : 113
|
||||
start : 112 cut : 113 end : 150
|
||||
k = 2 k1 = 1 k2 = 1 ent = 0.175565 ent1 = 0 ent2 = 0
|
||||
ig = 0.175565 delta = 2.45622 N 38 term 0.201728
|
||||
[[4.900000095367432, 4.949999809265137, 5.0, 5.099999904632568, 5.199999809265137, 5.25, 5.400000095367432, 5.449999809265137,
|
||||
5.5, 5.550000190734863, 5.599999904632568, 5.699999809265137, 5.800000190734863, 5.900000095367432, 5.949999809265137, 6.0, 6.050000190734863,
|
||||
6.099999904632568, 6.149999618530273, 6.199999809265137, 6.25, 6.300000190734863, 6.400000095367432, 6.5, 6.550000190734863, 6.649999618530273, 6.699999809265137,
|
||||
6.75, 6.800000190734863, 6.850000381469727, 6.900000095367432, 6.949999809265137, 7.050000190734863]]
|
@@ -1,183 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(fimdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
include_directories(.)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googlemock/include)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googlemock/test)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googletest/include)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googletest/samples)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googletest/src)
|
||||
include_directories(testcpp/build/_deps/googletest-src/googletest/test)
|
||||
|
||||
add_executable(fimdlp
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-matchers.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/custom/gmock-port.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-internal-utils.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-port.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/internal/gmock-pp.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-actions.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-cardinalities.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-function-mocker.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-matchers.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-actions.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-more-matchers.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-nice-strict.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock-spec-builders.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/include/gmock/gmock.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/src/gmock-all.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/src/gmock-cardinalities.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/src/gmock-internal-utils.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/src/gmock-matchers.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/src/gmock-spec-builders.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/src/gmock.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/src/gmock_main.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-actions_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-cardinalities_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-function-mocker_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-internal-utils_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-arithmetic_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-comparisons_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-containers_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers-misc_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-matchers_test.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-more-actions_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-nice-strict_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-port_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp-string_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-pp_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock-spec-builders_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_all_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_ex_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_leak_test_.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link2_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_link_test.h
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_output_test_.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_stress_test.cc
|
||||
testcpp/build/_deps/googletest-src/googlemock/test/gmock_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-port.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest-printers.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/custom/gtest.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-death-test-internal.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-filepath.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-internal.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-param-util.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port-arch.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-port.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-string.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/internal/gtest-type-util.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-assertion-result.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-death-test.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-matchers.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-message.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-param-test.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-printers.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-spi.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-test-part.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest-typed-test.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_pred_impl.h
|
||||
testcpp/build/_deps/googletest-src/googletest/include/gtest/gtest_prod.h
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/prime_tables.h
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample1.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample1.h
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample10_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample1_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample2.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample2.h
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample2_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample3-inl.h
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample3_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample4.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample4.h
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample4_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample5_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample6_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample7_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample8_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/samples/sample9_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-all.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-assertion-result.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-death-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-filepath.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-internal-inl.h
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-matchers.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-port.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-printers.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-test-part.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest-typed-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/src/gtest_main.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-break-on-failure-unittest_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-catch-exceptions-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-color-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-death-test_ex_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-env-var-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-failfast-unittest_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-filepath-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-filter-unittest_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-global-environment-unittest_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-list-tests-unittest_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-listener-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-message-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-options-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-output-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name1-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-invalid-name2-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test-test.h
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-param-test2-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-port-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-printers-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-setuptestsuite-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-shuffle-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-test-part-test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-throw-on-failure-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/googletest-uninitialized-test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test2_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest-typed-test_test.h
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest-unittest-api_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_all_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_assert_by_exception_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_environment_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_help_test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_list_output_unittest_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_main_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_no_test_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_pred_impl_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_premature_exit_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_prod_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_repeat_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_in_environment_setup_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_skip_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_sole_header_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_stress_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_test_macro_stack_footprint_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_testbridge_test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_throw_on_failure_ex_test.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_unittest.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile1_test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_outfile2_test_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/gtest_xml_output_unittest_.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/production.cc
|
||||
testcpp/build/_deps/googletest-src/googletest/test/production.h
|
||||
testcpp/CMakeLists.txt
|
||||
testcpp/FImdlp_unittest.cc
|
||||
testcpp/Metrics_unittest.cc
|
||||
cfimdlp.cpp
|
||||
CPPFImdlp.cpp
|
||||
CPPFImdlp.h
|
||||
Metrics.cpp
|
||||
Metrics.h
|
||||
typesFImdlp.h)
|
@@ -1,159 +0,0 @@
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
CPPFImdlp::CPPFImdlp(bool proposal):proposal(proposal), indices(indices_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||
{
|
||||
}
|
||||
CPPFImdlp::~CPPFImdlp()
|
||||
= default;
|
||||
|
||||
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.size() == 0 || y.size() == 0) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
indices = sortIndices(X_);
|
||||
metrics.setData(y, indices);
|
||||
if (proposal)
|
||||
computeCutPointsProposal();
|
||||
else
|
||||
computeCutPoints(0, X.size());
|
||||
return *this;
|
||||
}
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||
{
|
||||
int cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == -1 || !mdlp(start, cut, end)) {
|
||||
// cut.value == -1 means that there is no candidate in the interval
|
||||
// No boundary found, so we add both ends of the interval as cutpoints
|
||||
// because they were selected by the algorithm before
|
||||
if (start != 0)
|
||||
cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
|
||||
if (end != X.size())
|
||||
cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
|
||||
return;
|
||||
}
|
||||
computeCutPoints(start, cut);
|
||||
computeCutPoints(cut, end);
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsOriginal(size_t start, size_t end)
|
||||
{
|
||||
precision_t cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == -1)
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
cutPoints.push_back((X[indices[cut]] + X[indices[cut - 1]]) / 2);
|
||||
}
|
||||
computeCutPointsOriginal(start, cut);
|
||||
computeCutPointsOriginal(cut, end);
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsProposal()
|
||||
{
|
||||
precision_t xPrev, xCur, xPivot, cutPoint;
|
||||
int yPrev, yCur, yPivot;
|
||||
size_t idx, numElements, start;
|
||||
|
||||
xCur = xPrev = X[indices[0]];
|
||||
yCur = yPrev = y[indices[0]];
|
||||
numElements = indices.size() - 1;
|
||||
idx = start = 0;
|
||||
while (idx < numElements) {
|
||||
xPivot = xCur;
|
||||
yPivot = yCur;
|
||||
// Read the same values and check class changes
|
||||
do {
|
||||
idx++;
|
||||
xCur = X[indices[idx]];
|
||||
yCur = y[indices[idx]];
|
||||
if (yCur != yPivot && xCur == xPivot) {
|
||||
yPivot = -1;
|
||||
}
|
||||
}
|
||||
while (idx < numElements && xCur == xPivot);
|
||||
// Check if the class changed and there are more than 1 element
|
||||
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && mdlp(start, idx, indices.size())) {
|
||||
start = idx;
|
||||
cutPoint = (xPrev + xCur) / 2;
|
||||
cutPoints.push_back(cutPoint);
|
||||
}
|
||||
yPrev = yPivot;
|
||||
xPrev = xPivot;
|
||||
}
|
||||
}
|
||||
long int CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
long int candidate = -1, elements = end - start;
|
||||
precision_t entropy_left, entropy_right, minEntropy = numeric_limits<precision_t>::max();
|
||||
for (auto idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boudndaries
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k, k1, k2;
|
||||
precision_t ig, delta;
|
||||
precision_t ent, ent1, ent2;
|
||||
auto N = precision_t(end - start);
|
||||
if (N < 2) {
|
||||
return false;
|
||||
}
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
cutPoints_t CPPFImdlp::getCutPoints()
|
||||
{
|
||||
// Remove duplicates and sort
|
||||
cutPoints_t output(cutPoints.size());
|
||||
set<precision_t> s;
|
||||
unsigned size = cutPoints.size();
|
||||
for (unsigned i = 0; i < size; i++)
|
||||
s.insert(cutPoints[i]);
|
||||
output.assign(s.begin(), s.end());
|
||||
sort(output.begin(), output.end());
|
||||
return output;
|
||||
}
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
for (size_t i = 0; i < X_.size(); i++)
|
||||
sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2)
|
||||
{ return X_[i1] < X_[i2]; });
|
||||
return idx;
|
||||
}
|
||||
}
|
@@ -1,33 +0,0 @@
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <utility>
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
bool proposal;
|
||||
indices_t indices; // sorted indices to use with X and y
|
||||
samples_t X;
|
||||
labels_t y;
|
||||
Metrics metrics;
|
||||
cutPoints_t cutPoints;
|
||||
|
||||
static indices_t sortIndices(samples_t&);
|
||||
void computeCutPoints(size_t, size_t);
|
||||
long int getCandidate(size_t, size_t);
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
|
||||
// Original algorithm
|
||||
void computeCutPointsOriginal(size_t, size_t);
|
||||
bool goodCut(size_t, size_t, size_t);
|
||||
void computeCutPointsProposal();
|
||||
|
||||
public:
|
||||
CPPFImdlp(bool);
|
||||
~CPPFImdlp();
|
||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||
samples_t getCutPoints();
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,65 +0,0 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
||||
{
|
||||
}
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return nClasses.size();
|
||||
}
|
||||
void Metrics::setData(labels_t& y_, indices_t& indices_)
|
||||
{
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
precision_t p, ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) {
|
||||
return entropyCache[make_tuple(start, end)];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = (precision_t)count / nElements;
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[make_tuple(start, end)] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval, entropyLeft, entropyRight;
|
||||
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
||||
int nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements;
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
return iGain;
|
||||
}
|
||||
|
||||
}
|
@@ -1,20 +0,0 @@
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
#include "typesFImdlp.h"
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache;
|
||||
cacheIg_t igCache;
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(labels_t&, indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -3,7 +3,7 @@
|
||||
from libcpp.vector cimport vector
|
||||
from libcpp cimport bool
|
||||
|
||||
cdef extern from "CPPFImdlp.h" namespace "mdlp":
|
||||
cdef extern from "cppmdlp/CPPFImdlp.h" namespace "mdlp":
|
||||
ctypedef float precision_t
|
||||
cdef cppclass CPPFImdlp:
|
||||
CPPFImdlp(bool) except +
|
||||
@@ -22,4 +22,4 @@ cdef class CFImdlp:
|
||||
return self
|
||||
def get_cut_points(self):
|
||||
return self.thisptr.getCutPoints()
|
||||
|
||||
|
||||
|
Binary file not shown.
1
fimdlp/cppmdlp
Submodule
1
fimdlp/cppmdlp
Submodule
Submodule fimdlp/cppmdlp added at 7d940171b5
2
fimdlp/testcpp/.gitignore
vendored
2
fimdlp/testcpp/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
build
|
||||
build/*
|
@@ -1,33 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(FImdlp)
|
||||
|
||||
# GoogleTest requires at least C++14
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
include(FetchContent)
|
||||
|
||||
include_directories(${GTEST_INCLUDE_DIRS})
|
||||
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
enable_testing()
|
||||
|
||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cc)
|
||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../Metrics.cpp FImdlp_unittest.cc)
|
||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||
target_link_options(Metrics_unittest PRIVATE --coverage)
|
||||
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
||||
# -fprofile-arcs -ftest-coverage
|
||||
|
||||
include(GoogleTest)
|
||||
gtest_discover_tests(Metrics_unittest)
|
||||
gtest_discover_tests(FImdlp_unittest)
|
||||
|
@@ -1,166 +0,0 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
#include "../CPPFImdlp.h"
|
||||
namespace mdlp {
|
||||
class TestFImdlp: public CPPFImdlp, public testing::Test {
|
||||
public:
|
||||
TestFImdlp(): CPPFImdlp(false) {}
|
||||
void SetUp()
|
||||
{
|
||||
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
fit(X, y);
|
||||
}
|
||||
void setProposal(bool value)
|
||||
{
|
||||
proposal = value;
|
||||
}
|
||||
void initIndices()
|
||||
{
|
||||
indices = indices_t();
|
||||
}
|
||||
void checkSortedVector(samples_t& X_, indices_t indices_)
|
||||
{
|
||||
X = X_;
|
||||
indices = indices_;
|
||||
indices_t testSortedIndices = sortIndices(X);
|
||||
precision_t prev = X[testSortedIndices[0]];
|
||||
for (auto i = 0; i < X.size(); ++i) {
|
||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||
prev = X[testSortedIndices[i]];
|
||||
}
|
||||
}
|
||||
void checkCutPoints(cutPoints_t& expected)
|
||||
{
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||
for (auto i = 0; i < expectedSize; i++) {
|
||||
EXPECT_EQ(cutPoints[i], expected[i]);
|
||||
}
|
||||
}
|
||||
template<typename T, typename A>
|
||||
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||
{
|
||||
EXPECT_EQ(expected.size(), computed.size());
|
||||
for (auto i = 0; i < expected.size(); i++) {
|
||||
EXPECT_EQ(expected[i], computed[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||
{
|
||||
X = samples_t();
|
||||
y = labels_t();
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
//
|
||||
// TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||
// {
|
||||
// X = { 1, 2, 3 };
|
||||
// y = { 1, 2 };
|
||||
// EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, SortIndices)
|
||||
// {
|
||||
// X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
// indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
// checkSortedVector(X, indices);
|
||||
// X = { 5.77, 5.88, 5.99 };
|
||||
// indices = { 0, 1, 2 };
|
||||
// checkSortedVector(X, indices);
|
||||
// X = { 5.33, 5.22, 5.11 };
|
||||
// indices = { 2, 1, 0 };
|
||||
// checkSortedVector(X, indices);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, EvaluateCutPoint)
|
||||
// {
|
||||
// cutPoint_t rest, candidate;
|
||||
// rest = { 0, 10, -1, -1, 1000 };
|
||||
// candidate = { 0, 4, -1, -1, 5.15 };
|
||||
// EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// setCutPoints(cutPoints_t());
|
||||
// computeCutPointsOriginal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||
// };
|
||||
// X = { 0, 1, 2, 2 };
|
||||
// y = { 1, 1, 1, 2 };
|
||||
// fit(X, y);
|
||||
// computeCutPointsOriginal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsProposal)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
||||
// { 6, 9, -1, 5.4, 5.85 },
|
||||
// { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// computeCutPointsProposal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||
// { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// X = { 0, 1, 2, 2 };
|
||||
// y = { 1, 1, 1, 2 };
|
||||
// fit(X, y);
|
||||
// computeCutPointsProposal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, DiscretizedValues)
|
||||
// {
|
||||
// labels_t computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
// computed = getDiscretizedValues();
|
||||
// checkVectors(expected, computed);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, GetCutPoints)
|
||||
// {
|
||||
// samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
|
||||
// computeCutPointsOriginal();
|
||||
// computed = getCutPoints();
|
||||
// checkVectors(expected, computed);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, Constructor)
|
||||
// {
|
||||
// samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
// labels_t y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
// setProposal(false);
|
||||
// fit(X, y);
|
||||
// computeCutPointsOriginal();
|
||||
// cutPoints_t expected;
|
||||
// vector<precision_t> computed = getCutPoints();
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// computed = getCutPoints();
|
||||
// int expectedSize = expected.size();
|
||||
// EXPECT_EQ(computed.size(), expected.size());
|
||||
// for (auto i = 0; i < expectedSize; i++) {
|
||||
// EXPECT_NEAR(computed[i], expected[i].toValue, .00000001);
|
||||
// }
|
||||
// }
|
||||
//}
|
@@ -1,43 +0,0 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
|
||||
|
||||
namespace mdlp {
|
||||
class TestMetrics: public Metrics, public testing::Test {
|
||||
public:
|
||||
labels_t y;
|
||||
samples_t X;
|
||||
indices_t indices;
|
||||
precision_t precision = 0.000001;
|
||||
|
||||
TestMetrics(): Metrics(y, indices) {}
|
||||
void SetUp()
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
setData(y, indices);
|
||||
}
|
||||
};
|
||||
TEST_F(TestMetrics, NumClasses)
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
EXPECT_EQ(1, computeNumClasses(4, 8));
|
||||
EXPECT_EQ(2, computeNumClasses(0, 10));
|
||||
EXPECT_EQ(2, computeNumClasses(8, 10));
|
||||
}
|
||||
TEST_F(TestMetrics, Entropy)
|
||||
{
|
||||
EXPECT_EQ(1, entropy(0, 10));
|
||||
EXPECT_EQ(0, entropy(0, 5));
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.468996, entropy(0, 10), precision);
|
||||
}
|
||||
TEST_F(TestMetrics, InformationGain)
|
||||
{
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision);
|
||||
}
|
||||
}
|
@@ -1,4 +0,0 @@
|
||||
rm -fr lcoverage/*
|
||||
lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
||||
genhtml lcoverage/main_coverage.info --output-directory lcoverage
|
||||
open lcoverage/index.html
|
@@ -1,225 +0,0 @@
|
||||
% 1. Title: Iris Plants Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: R.A. Fisher
|
||||
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
|
||||
% (c) Date: July, 1988
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% - Publications: too many to mention!!! Here are a few.
|
||||
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
|
||||
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
|
||||
% to Mathematical Statistics" (John Wiley, NY, 1950).
|
||||
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
|
||||
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
|
||||
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
|
||||
% Structure and Classification Rule for Recognition in Partially Exposed
|
||||
% Environments". IEEE Transactions on Pattern Analysis and Machine
|
||||
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates (0% for the setosa class)
|
||||
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
|
||||
% Transactions on Information Theory, May 1972, 431-433.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates again
|
||||
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
|
||||
% conceptual clustering system finds 3 classes in the data.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
% --- This is perhaps the best known database to be found in the pattern
|
||||
% recognition literature. Fisher's paper is a classic in the field
|
||||
% and is referenced frequently to this day. (See Duda & Hart, for
|
||||
% example.) The data set contains 3 classes of 50 instances each,
|
||||
% where each class refers to a type of iris plant. One class is
|
||||
% linearly separable from the other 2; the latter are NOT linearly
|
||||
% separable from each other.
|
||||
% --- Predicted attribute: class of iris plant.
|
||||
% --- This is an exceedingly simple domain.
|
||||
%
|
||||
% 5. Number of Instances: 150 (50 in each of three classes)
|
||||
%
|
||||
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. sepal length in cm
|
||||
% 2. sepal width in cm
|
||||
% 3. petal length in cm
|
||||
% 4. petal width in cm
|
||||
% 5. class:
|
||||
% -- Iris Setosa
|
||||
% -- Iris Versicolour
|
||||
% -- Iris Virginica
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Min Max Mean SD Class Correlation
|
||||
% sepal length: 4.3 7.9 5.84 0.83 0.7826
|
||||
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
|
||||
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
|
||||
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
|
||||
%
|
||||
% 9. Class Distribution: 33.3% for each of 3 classes.
|
||||
|
||||
@RELATION iris
|
||||
|
||||
@ATTRIBUTE sepallength REAL
|
||||
@ATTRIBUTE sepalwidth REAL
|
||||
@ATTRIBUTE petallength REAL
|
||||
@ATTRIBUTE petalwidth REAL
|
||||
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
||||
|
||||
@DATA
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
%
|
||||
%
|
||||
%
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
cmake -S . -B build -Wno-dev
|
||||
if test $? -ne 0; then
|
||||
echo "Error in creating build commands."
|
||||
exit 1
|
||||
fi
|
||||
cmake --build build
|
||||
if test $? -ne 0; then
|
||||
echo "Error in build command."
|
||||
exit 1
|
||||
fi
|
||||
cd build
|
||||
ctest --output-on-failure
|
@@ -1,117 +0,0 @@
|
||||
#include "ArffFiles.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArffFiles::ArffFiles()
|
||||
{
|
||||
}
|
||||
vector<string> ArffFiles::getLines()
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
string ArffFiles::getClassName()
|
||||
{
|
||||
return className;
|
||||
}
|
||||
string ArffFiles::getClassType()
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
vector<vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
}
|
||||
generateDataset(classLast);
|
||||
} else
|
||||
throw invalid_argument("Unable to open file");
|
||||
}
|
||||
void ArffFiles::generateDataset(bool classLast)
|
||||
{
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
string ArffFiles::trim(const string& source)
|
||||
{
|
||||
string s(source);
|
||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
|
||||
{
|
||||
vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
map<string, int> labelMap;
|
||||
int i = 0;
|
||||
for (string label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@@ -1,28 +0,0 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
void generateDataset(bool);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(string, bool = true);
|
||||
vector<string> getLines();
|
||||
unsigned long int getSize();
|
||||
string getClassName();
|
||||
string getClassType();
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels_t);
|
||||
};
|
||||
#endif
|
@@ -1,6 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(main)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
add_executable(main main.cpp ArffFiles.cpp ../../Metrics.cpp ../../CPPFImdlp.cpp)
|
@@ -1,54 +0,0 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include "../../CPPFImdlp.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute : attributes) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<vector<float>>& X = file.getX();
|
||||
vector<int>& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (auto feature : X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp();
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -1,16 +0,0 @@
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef vector<precision_t> samples_t;
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<tuple<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
Reference in New Issue
Block a user