Enhance code

This commit is contained in:
2022-11-28 23:22:03 +01:00
parent ad835c9218
commit 182c949713
5 changed files with 23 additions and 31 deletions

14
debug.cpp Normal file
View File

@@ -0,0 +1,14 @@
std::cout << "+++++++++++++++++++++++" << std::endl;
for (size_t i = 0; i < y.size(); i++)
{
printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
std::cout << "Information Gain:" << std::endl;
auto nc = Metrics::numClasses(y, indices, 0, indices.size());
for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint)
{
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
}

View File

@@ -53,20 +53,6 @@ namespace CPPFImdlp
yPrev = y.at(*index); yPrev = y.at(*index);
idxPrev = *index; idxPrev = *index;
} }
std::cout << "Information Gain:" << std::endl;
auto nc = Metrics::numClasses(y, indices, 0, indices.size());
for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint)
{
std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl;
// << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl;
}
std::cout << "+++++++++++++++++++++++" << std::endl;
for (size_t i = 0; i < y.size(); i++)
{
printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
return cutPts; return cutPts;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes

View File

@@ -62,7 +62,6 @@ class FImdlp(TransformerMixin, BaseEstimator):
self.X_ = X self.X_ = X
self.y_ = y self.y_ = y
self.discretizer_ = CFImdlp() self.discretizer_ = CFImdlp()
return self return self
def transform(self, X): def transform(self, X):
@@ -92,7 +91,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
raise ValueError( raise ValueError(
"Shape of input is different from what was seen in `fit`" "Shape of input is different from what was seen in `fit`"
) )
print("Cut points for each feature in Iris dataset:") print("Calculating cut points in python for first feature")
yz = self.y_.copy() yz = self.y_.copy()
xz = X[:, 0].copy() xz = X[:, 0].copy()
xz = xz[np.argsort(X[:, 0])] xz = xz[np.argsort(X[:, 0])]
@@ -102,14 +101,12 @@ class FImdlp(TransformerMixin, BaseEstimator):
if yz[i] != yz[i - 1] and xz[i - 1] < xz[i]: if yz[i] != yz[i - 1] and xz[i - 1] < xz[i]:
print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})") print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})")
cuts.append((xz[i] + xz[i - 1]) / 2) cuts.append((xz[i] + xz[i - 1]) / 2)
for i in range(0, 1): # self.n_features_): print("Cuts calculados en python: ", cuts)
datax = np.sort(X[:, i]) print("Cuts calculados en C++")
Xcutpoints = self.discretizer_.cut_points(datax, self.y_) print("Cut points for each feature in Iris dataset:")
for i in range(0, self.n_features_):
datax = self.X_[np.argsort(self.X_[:, i]), i]
y_ = self.y_[np.argsort(self.X_[:, i])]
Xcutpoints = self.discretizer_.cut_points(datax, y_)
print(f"{self.features_[i]:20s}: {Xcutpoints}") print(f"{self.features_[i]:20s}: {Xcutpoints}")
print("Solución cut_points: ", cuts)
print(xz)
print("***********")
for i in range(0, len(yz)):
print(f"({xz[i]}, {yz[i]})")
print("***********")
return X return X

View File

@@ -7,9 +7,4 @@ X = data.data
y = data.target y = data.target
features = data.feature_names features = data.feature_names
test = FImdlp() test = FImdlp()
Xcutpoints = test.fit(X, y, features=features).transform(X) test.fit(X, y, features=features).transform(X)
clf = CFImdlp(debug=True)
print("Cut points for feature 0 in Iris dataset:")
print(clf.cut_points(X[:, 0], y))
print("Xcut")
print(Xcutpoints)