From a1f26a257cc8f334d60b5e71e7c9b6599e23862d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 25 Apr 2023 10:48:59 +0200 Subject: [PATCH] Reformat code and update version --- CPPFImdlp.cpp | 37 +++++--- CPPFImdlp.h | 17 +--- Metrics.cpp | 27 +++--- Metrics.h | 12 +-- tests/ArffFiles.cpp | 35 ++++--- tests/ArffFiles.h | 20 +--- tests/FImdlp_unittest.cpp | 189 +++++++++++++++++++++---------------- tests/Metrics_unittest.cpp | 33 ++++--- 8 files changed, 199 insertions(+), 171 deletions(-) diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index 79f9152..92f3353 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -7,16 +7,18 @@ namespace mdlp { - CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_), - max_depth(max_depth_), - proposed_cuts(proposed) { + CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed): min_length(min_length_), + max_depth(max_depth_), + proposed_cuts(proposed) + { } CPPFImdlp::CPPFImdlp() = default; CPPFImdlp::~CPPFImdlp() = default; - size_t CPPFImdlp::compute_max_num_cut_points() const { + size_t CPPFImdlp::compute_max_num_cut_points() const + { // Set the actual maximum number of cut points as a number or as a percentage of the number of samples if (proposed_cuts == 0) { return numeric_limits::max(); @@ -29,7 +31,8 @@ namespace mdlp { return static_cast(proposed_cuts); } - void CPPFImdlp::fit(samples_t &X_, labels_t &y_) { + void CPPFImdlp::fit(samples_t& X_, labels_t& y_) + { X = X_; y = y_; num_cut_points = compute_max_num_cut_points(); @@ -59,7 +62,8 @@ namespace mdlp { } } - pair CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) { + pair CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) + { size_t n; size_t m; size_t idxPrev = cut - 1 >= start ? cut - 1 : cut; @@ -88,10 +92,11 @@ namespace mdlp { // Decide which values to use cut = cut + (backWall ? m + 1 : -n); actual = X[indices[cut]]; - return {(actual + previous) / 2, cut}; + return { (actual + previous) / 2, cut }; } - void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) { + void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) + { size_t cut; pair result; // Check if the interval length and the depth are Ok @@ -110,7 +115,8 @@ namespace mdlp { } } - size_t CPPFImdlp::getCandidate(size_t start, size_t end) { + size_t CPPFImdlp::getCandidate(size_t start, size_t end) + { /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which E(A, TA; S) is minimal amongst all the candidate cut points. */ size_t candidate = numeric_limits::max(); @@ -143,7 +149,8 @@ namespace mdlp { return candidate; } - bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) { + bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) + { int k; int k1; int k2; @@ -161,13 +168,14 @@ namespace mdlp { ent2 = metrics.entropy(cut, end); ig = metrics.informationGain(start, cut, end); delta = static_cast(log2(pow(3, precision_t(k)) - 2) - - (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2)); + (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2)); precision_t term = 1 / N * (log2(N - 1) + delta); return ig > term; } // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes - indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) { + indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_) + { indices_t idx(X_.size()); iota(idx.begin(), idx.end(), 0); stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) { @@ -175,11 +183,12 @@ namespace mdlp { return y_[i1] < y_[i2]; else return X_[i1] < X_[i2]; - }); + }); return idx; } - void CPPFImdlp::resizeCutPoints() { + void CPPFImdlp::resizeCutPoints() + { //Compute entropy of each of the whole cutpoint set and discards the biggest value precision_t maxEntropy = 0; precision_t entropy; diff --git a/CPPFImdlp.h b/CPPFImdlp.h index 29b76c6..b6066c4 100644 --- a/CPPFImdlp.h +++ b/CPPFImdlp.h @@ -21,34 +21,23 @@ namespace mdlp { cutPoints_t cutPoints; size_t num_cut_points = numeric_limits::max(); - static indices_t sortIndices(samples_t &, labels_t &); + static indices_t sortIndices(samples_t&, labels_t&); void computeCutPoints(size_t, size_t, int); - void resizeCutPoints(); - bool mdlp(size_t, size_t, size_t); - size_t getCandidate(size_t, size_t); - size_t compute_max_num_cut_points() const; - pair valueCutPoint(size_t, size_t, size_t); public: CPPFImdlp(); - CPPFImdlp(size_t, int, float); - ~CPPFImdlp(); - - void fit(samples_t &, labels_t &); - + void fit(samples_t&, labels_t&); inline cutPoints_t getCutPoints() const { return cutPoints; }; - inline int get_depth() const { return depth; }; - - static inline string version() { return "1.1.1"; }; + static inline string version() { return "1.1.2"; }; }; } #endif diff --git a/Metrics.cpp b/Metrics.cpp index c77da01..71a3c07 100644 --- a/Metrics.cpp +++ b/Metrics.cpp @@ -4,11 +4,13 @@ using namespace std; namespace mdlp { - Metrics::Metrics(labels_t &y_, indices_t &indices_) : y(y_), indices(indices_), - numClasses(computeNumClasses(0, indices.size())) { + Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), + numClasses(computeNumClasses(0, indices.size())) + { } - int Metrics::computeNumClasses(size_t start, size_t end) { + int Metrics::computeNumClasses(size_t start, size_t end) + { set nClasses; for (auto i = start; i < end; ++i) { nClasses.insert(y[indices[i]]); @@ -16,7 +18,8 @@ namespace mdlp { return static_cast(nClasses.size()); } - void Metrics::setData(const labels_t &y_, const indices_t &indices_) { + void Metrics::setData(const labels_t& y_, const indices_t& indices_) + { indices = indices_; y = y_; numClasses = computeNumClasses(0, indices.size()); @@ -24,21 +27,22 @@ namespace mdlp { igCache.clear(); } - precision_t Metrics::entropy(size_t start, size_t end) { + precision_t Metrics::entropy(size_t start, size_t end) + { precision_t p; precision_t ventropy = 0; int nElements = 0; labels_t counts(numClasses + 1, 0); if (end - start < 2) return 0; - if (entropyCache.find({start, end}) != entropyCache.end()) { + if (entropyCache.find({ start, end }) != entropyCache.end()) { return entropyCache[{start, end}]; } for (auto i = &indices[start]; i != &indices[end]; ++i) { counts[y[*i]]++; nElements++; } - for (auto count: counts) { + for (auto count : counts) { if (count > 0) { p = static_cast(count) / static_cast(nElements); ventropy -= p * log2(p); @@ -48,7 +52,8 @@ namespace mdlp { return ventropy; } - precision_t Metrics::informationGain(size_t start, size_t cut, size_t end) { + precision_t Metrics::informationGain(size_t start, size_t cut, size_t end) + { precision_t iGain; precision_t entropyInterval; precision_t entropyLeft; @@ -63,9 +68,9 @@ namespace mdlp { entropyLeft = entropy(start, cut); entropyRight = entropy(cut, end); iGain = entropyInterval - - (static_cast(nElementsLeft) * entropyLeft + - static_cast(nElementsRight) * entropyRight) / - static_cast(nElements); + (static_cast(nElementsLeft) * entropyLeft + + static_cast(nElementsRight) * entropyRight) / + static_cast(nElements); igCache[make_tuple(start, cut, end)] = iGain; return iGain; } diff --git a/Metrics.h b/Metrics.h index 4046a6d..4f8151a 100644 --- a/Metrics.h +++ b/Metrics.h @@ -6,20 +6,16 @@ namespace mdlp { class Metrics { protected: - labels_t &y; - indices_t &indices; + labels_t& y; + indices_t& indices; int numClasses; cacheEnt_t entropyCache = cacheEnt_t(); cacheIg_t igCache = cacheIg_t(); public: - Metrics(labels_t &, indices_t &); - - void setData(const labels_t &, const indices_t &); - + Metrics(labels_t&, indices_t&); + void setData(const labels_t&, const indices_t&); int computeNumClasses(size_t, size_t); - precision_t entropy(size_t, size_t); - precision_t informationGain(size_t, size_t, size_t); }; } diff --git a/tests/ArffFiles.cpp b/tests/ArffFiles.cpp index d815000..82f5774 100644 --- a/tests/ArffFiles.cpp +++ b/tests/ArffFiles.cpp @@ -7,35 +7,43 @@ using namespace std; ArffFiles::ArffFiles() = default; -vector ArffFiles::getLines() const { +vector ArffFiles::getLines() const +{ return lines; } -unsigned long int ArffFiles::getSize() const { +unsigned long int ArffFiles::getSize() const +{ return lines.size(); } -vector> ArffFiles::getAttributes() const { +vector> ArffFiles::getAttributes() const +{ return attributes; } -string ArffFiles::getClassName() const { +string ArffFiles::getClassName() const +{ return className; } -string ArffFiles::getClassType() const { +string ArffFiles::getClassType() const +{ return classType; } -vector &ArffFiles::getX() { +vector& ArffFiles::getX() +{ return X; } -vector &ArffFiles::getY() { +vector& ArffFiles::getY() +{ return y; } -void ArffFiles::load(const string &fileName, bool classLast) { +void ArffFiles::load(const string& fileName, bool classLast) +{ ifstream file(fileName); if (!file.is_open()) { throw invalid_argument("Unable to open file"); @@ -79,7 +87,8 @@ void ArffFiles::load(const string &fileName, bool classLast) { } -void ArffFiles::generateDataset(bool classLast) { +void ArffFiles::generateDataset(bool classLast) +{ X = vector(attributes.size(), mdlp::samples_t(lines.size())); auto yy = vector(lines.size(), ""); int labelIndex = classLast ? static_cast(attributes.size()) : 0; @@ -99,19 +108,21 @@ void ArffFiles::generateDataset(bool classLast) { y = factorize(yy); } -string ArffFiles::trim(const string &source) { +string ArffFiles::trim(const string& source) +{ string s(source); s.erase(0, s.find_first_not_of(" \n\r\t")); s.erase(s.find_last_not_of(" \n\r\t") + 1); return s; } -vector ArffFiles::factorize(const vector &labels_t) { +vector ArffFiles::factorize(const vector& labels_t) +{ vector yy; yy.reserve(labels_t.size()); map labelMap; int i = 0; - for (const string &label: labels_t) { + for (const string& label : labels_t) { if (labelMap.find(label) == labelMap.end()) { labelMap[label] = i++; } diff --git a/tests/ArffFiles.h b/tests/ArffFiles.h index 9b0aa2b..f36d9d3 100644 --- a/tests/ArffFiles.h +++ b/tests/ArffFiles.h @@ -20,26 +20,16 @@ private: public: ArffFiles(); - - void load(const string &, bool = true); - + void load(const string&, bool = true); vector getLines() const; - unsigned long int getSize() const; - string getClassName() const; - string getClassType() const; - - static string trim(const string &); - - vector &getX(); - - vector &getY(); - + static string trim(const string&); + vector& getX(); + vector& getY(); vector> getAttributes() const; - - static vector factorize(const vector &labels_t); + static vector factorize(const vector& labels_t); }; #endif \ No newline at end of file diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index d0da40e..fece4bd 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -15,23 +15,25 @@ throw; \ , etype) namespace mdlp { - class TestFImdlp : public CPPFImdlp, public testing::Test { + class TestFImdlp: public CPPFImdlp, public testing::Test { public: precision_t precision = 0.000001f; - TestFImdlp() : CPPFImdlp() {} + TestFImdlp(): CPPFImdlp() {} string data_path; - void SetUp() override { - X = {4.7f, 4.7f, 4.7f, 4.7f, 4.8f, 4.8f, 4.8f, 4.8f, 4.9f, 4.95f, 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, - 6.0f, 5.1f, 5.9f}; - y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; + void SetUp() override + { + X = { 4.7f, 4.7f, 4.7f, 4.7f, 4.8f, 4.8f, 4.8f, 4.8f, 4.9f, 4.95f, 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, + 6.0f, 5.1f, 5.9f }; + y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; fit(X, y); data_path = set_data_path(); } - static string set_data_path() { + static string set_data_path() + { string path = "../datasets/"; ifstream file(path + "iris.arff"); if (file.is_open()) { @@ -41,7 +43,8 @@ namespace mdlp { return "../../tests/datasets/"; } - void checkSortedVector() { + void checkSortedVector() + { indices_t testSortedIndices = sortIndices(X, y); precision_t prev = X[testSortedIndices[0]]; for (unsigned long i = 0; i < X.size(); ++i) { @@ -51,7 +54,8 @@ namespace mdlp { } } - void checkCutPoints(cutPoints_t &computed, cutPoints_t &expected) const { + void checkCutPoints(cutPoints_t& computed, cutPoints_t& expected) const + { EXPECT_EQ(computed.size(), expected.size()); for (unsigned long i = 0; i < computed.size(); i++) { cout << "(" << computed[i] << ", " << expected[i] << ") "; @@ -59,9 +63,10 @@ namespace mdlp { } } - bool test_result(const samples_t &X_, size_t cut, float midPoint, size_t limit, const string &title) { + bool test_result(const samples_t& X_, size_t cut, float midPoint, size_t limit, const string& title) + { pair result; - labels_t y_ = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; X = X_; y = y_; indices = sortIndices(X, y); @@ -72,12 +77,13 @@ namespace mdlp { return true; } - void test_dataset(CPPFImdlp &test, const string &filename, vector &expected, - vector &depths) const { + void test_dataset(CPPFImdlp& test, const string& filename, vector& expected, + vector& depths) const + { ArffFiles file; file.load(data_path + filename + ".arff", true); - vector &X = file.getX(); - labels_t &y = file.getY(); + vector& X = file.getX(); + labels_t& y = file.getY(); auto attributes = file.getAttributes(); for (auto feature = 0; feature < attributes.size(); feature++) { test.fit(X[feature], y); @@ -90,92 +96,100 @@ namespace mdlp { } }; - TEST_F(TestFImdlp, FitErrorEmptyDataset) { + TEST_F(TestFImdlp, FitErrorEmptyDataset) + { X = samples_t(); y = labels_t(); EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have at least one element"); } - TEST_F(TestFImdlp, FitErrorDifferentSize) { - X = {1, 2, 3}; - y = {1, 2}; + TEST_F(TestFImdlp, FitErrorDifferentSize) + { + X = { 1, 2, 3 }; + y = { 1, 2 }; EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size"); } - TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth) { + TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth) + { auto testLength = CPPFImdlp(2, 10, 0); auto testDepth = CPPFImdlp(3, 0, 0); - X = {1, 2, 3}; - y = {1, 2, 3}; + X = { 1, 2, 3 }; + y = { 1, 2, 3 }; EXPECT_THROW_WITH_MESSAGE(testLength.fit(X, y), invalid_argument, "min_length must be greater than 2"); EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0"); } - TEST_F(TestFImdlp, JoinFit) { - samples_t X_ = {1, 2, 2, 3, 4, 2, 3}; - labels_t y_ = {0, 0, 1, 2, 3, 4, 5}; - cutPoints_t expected = {1.5f, 2.5f}; + TEST_F(TestFImdlp, JoinFit) + { + samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 }; + labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 }; + cutPoints_t expected = { 1.5f, 2.5f }; fit(X_, y_); auto computed = getCutPoints(); EXPECT_EQ(computed.size(), expected.size()); checkCutPoints(computed, expected); } - TEST_F(TestFImdlp, FitErrorMaxCutPoints) { + TEST_F(TestFImdlp, FitErrorMaxCutPoints) + { auto testmin = CPPFImdlp(2, 10, -1); auto testmax = CPPFImdlp(3, 0, 200); - X = {1, 2, 3}; - y = {1, 2, 3}; + X = { 1, 2, 3 }; + y = { 1, 2, 3 }; EXPECT_THROW_WITH_MESSAGE(testmin.fit(X, y), invalid_argument, "wrong proposed num_cuts value"); EXPECT_THROW_WITH_MESSAGE(testmax.fit(X, y), invalid_argument, "wrong proposed num_cuts value"); } - TEST_F(TestFImdlp, SortIndices) { - X = {5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, 6.0f, 5.1f, 5.9f}; - y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; - indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7}; + TEST_F(TestFImdlp, SortIndices) + { + X = { 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, 6.0f, 5.1f, 5.9f }; + y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; + indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; checkSortedVector(); - X = {5.77f, 5.88f, 5.99f}; - y = {1, 2, 1}; - indices = {0, 1, 2}; + X = { 5.77f, 5.88f, 5.99f }; + y = { 1, 2, 1 }; + indices = { 0, 1, 2 }; checkSortedVector(); - X = {5.33f, 5.22f, 5.11f}; - y = {1, 2, 1}; - indices = {2, 1, 0}; + X = { 5.33f, 5.22f, 5.11f }; + y = { 1, 2, 1 }; + indices = { 2, 1, 0 }; checkSortedVector(); - X = {5.33f, 5.22f, 5.33f}; - y = {2, 2, 1}; - indices = {1, 2, 0}; + X = { 5.33f, 5.22f, 5.33f }; + y = { 2, 2, 1 }; + indices = { 1, 2, 0 }; } - TEST_F(TestFImdlp, TestShortDatasets) { + TEST_F(TestFImdlp, TestShortDatasets) + { vector computed; - X = {1}; - y = {1}; + X = { 1 }; + y = { 1 }; fit(X, y); computed = getCutPoints(); EXPECT_EQ(computed.size(), 0); - X = {1, 3}; - y = {1, 2}; + X = { 1, 3 }; + y = { 1, 2 }; fit(X, y); computed = getCutPoints(); EXPECT_EQ(computed.size(), 0); - X = {2, 4}; - y = {1, 2}; + X = { 2, 4 }; + y = { 1, 2 }; fit(X, y); computed = getCutPoints(); EXPECT_EQ(computed.size(), 0); - X = {1, 2, 3}; - y = {1, 2, 2}; + X = { 1, 2, 3 }; + y = { 1, 2, 2 }; fit(X, y); computed = getCutPoints(); EXPECT_EQ(computed.size(), 1); EXPECT_NEAR(computed[0], 1.5, precision); } - TEST_F(TestFImdlp, TestArtificialDataset) { + TEST_F(TestFImdlp, TestArtificialDataset) + { fit(X, y); - cutPoints_t expected = {5.05f}; + cutPoints_t expected = { 5.05f }; vector computed = getCutPoints(); EXPECT_EQ(computed.size(), expected.size()); for (unsigned long i = 0; i < computed.size(); i++) { @@ -183,49 +197,53 @@ namespace mdlp { } } - TEST_F(TestFImdlp, TestIris) { + TEST_F(TestFImdlp, TestIris) + { vector expected = { {5.45f, 5.75f}, {2.75f, 2.85f, 2.95f, 3.05f, 3.35f}, {2.45f, 4.75f, 5.05f}, {0.8f, 1.75f} }; - vector depths = {3, 5, 4, 3}; + vector depths = { 3, 5, 4, 3 }; auto test = CPPFImdlp(); test_dataset(test, "iris", expected, depths); } - TEST_F(TestFImdlp, ComputeCutPointsGCase) { + TEST_F(TestFImdlp, ComputeCutPointsGCase) + { cutPoints_t expected; - expected = {1.5}; - samples_t X_ = {0, 1, 2, 2, 2}; - labels_t y_ = {1, 1, 1, 2, 2}; + expected = { 1.5 }; + samples_t X_ = { 0, 1, 2, 2, 2 }; + labels_t y_ = { 1, 1, 1, 2, 2 }; fit(X_, y_); auto computed = getCutPoints(); checkCutPoints(computed, expected); } - TEST_F(TestFImdlp, ValueCutPoint) { + TEST_F(TestFImdlp, ValueCutPoint) + { // Case titles as stated in the doc - samples_t X1a{3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.0f}; + samples_t X1a{ 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.0f }; test_result(X1a, 6, 7.3f / 2, 6, "1a"); - samples_t X2a = {3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f}; + samples_t X2a = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f }; test_result(X2a, 6, 7.1f / 2, 4, "2a"); - samples_t X2b = {3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f}; + samples_t X2b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f }; test_result(X2b, 6, 7.5f / 2, 7, "2b"); - samples_t X3a = {3.f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f}; + samples_t X3a = { 3.f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f }; test_result(X3a, 4, 7.1f / 2, 4, "3a"); - samples_t X3b = {3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f}; + samples_t X3b = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f }; test_result(X3b, 4, 7.1f / 2, 4, "3b"); - samples_t X4a = {3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.9f, 4.0f}; + samples_t X4a = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.9f, 4.0f }; test_result(X4a, 4, 6.9f / 2, 2, "4a"); - samples_t X4b = {3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f}; + samples_t X4b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f }; test_result(X4b, 4, 7.5f / 2, 7, "4b"); - samples_t X4c = {3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f}; + samples_t X4c = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f }; test_result(X4c, 4, 6.9f / 2, 2, "4c"); } - TEST_F(TestFImdlp, MaxDepth) { + TEST_F(TestFImdlp, MaxDepth) + { // Set max_depth to 1 auto test = CPPFImdlp(3, 1, 0); vector expected = { @@ -234,11 +252,12 @@ namespace mdlp { {2.45f}, {0.8f} }; - vector depths = {1, 1, 1, 1}; + vector depths = { 1, 1, 1, 1 }; test_dataset(test, "iris", expected, depths); } - TEST_F(TestFImdlp, MinLength) { + TEST_F(TestFImdlp, MinLength) + { auto test = CPPFImdlp(75, 100, 0); // Set min_length to 75 vector expected = { @@ -247,11 +266,12 @@ namespace mdlp { {2.45f, 4.75f}, {0.8f, 1.75f} }; - vector depths = {3, 2, 2, 2}; + vector depths = { 3, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); } - TEST_F(TestFImdlp, MinLengthMaxDepth) { + TEST_F(TestFImdlp, MinLengthMaxDepth) + { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 0); vector expected = { @@ -260,11 +280,12 @@ namespace mdlp { {2.45f, 4.75f}, {0.8f, 1.75f} }; - vector depths = {2, 2, 2, 2}; + vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); } - TEST_F(TestFImdlp, MaxCutPointsInteger) { + TEST_F(TestFImdlp, MaxCutPointsInteger) + { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 1); vector expected = { @@ -273,12 +294,13 @@ namespace mdlp { {2.45f}, {0.8f} }; - vector depths = {2, 2, 2, 2}; + vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); - + } - TEST_F(TestFImdlp, MaxCutPointsFloat) { + TEST_F(TestFImdlp, MaxCutPointsFloat) + { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 0.2f); vector expected = { @@ -287,19 +309,20 @@ namespace mdlp { {2.45f, 4.75f}, {0.8f, 1.75f} }; - vector depths = {2, 2, 2, 2}; + vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); } - TEST_F(TestFImdlp, ProposedCuts) { - vector> proposed_list = {{0.1f, 2}, + TEST_F(TestFImdlp, ProposedCuts) + { + vector> proposed_list = { {0.1f, 2}, {0.5f, 10}, {0.07f, 1}, {1.0f, 1}, - {2.0f, 2}}; + {2.0f, 2} }; size_t expected; size_t computed; - for (auto proposed_item: proposed_list) { + for (auto proposed_item : proposed_list) { tie(proposed_cuts, expected) = proposed_item; computed = compute_max_num_cut_points(); ASSERT_EQ(expected, computed); diff --git a/tests/Metrics_unittest.cpp b/tests/Metrics_unittest.cpp index d8ee8db..989d18b 100644 --- a/tests/Metrics_unittest.cpp +++ b/tests/Metrics_unittest.cpp @@ -2,46 +2,51 @@ #include "../Metrics.h" namespace mdlp { - class TestMetrics : public Metrics, public testing::Test { + class TestMetrics: public Metrics, public testing::Test { public: - labels_t y_ = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; - indices_t indices_ = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; + indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; precision_t precision = 0.000001f; - TestMetrics() : Metrics(y_, indices_) {}; + TestMetrics(): Metrics(y_, indices_) {}; - void SetUp() override { + void SetUp() override + { setData(y_, indices_); } }; - TEST_F(TestMetrics, NumClasses) { - y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; + TEST_F(TestMetrics, NumClasses) + { + y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }; EXPECT_EQ(1, computeNumClasses(4, 8)); EXPECT_EQ(2, computeNumClasses(0, 10)); EXPECT_EQ(2, computeNumClasses(8, 10)); } - TEST_F(TestMetrics, Entropy) { + TEST_F(TestMetrics, Entropy) + { EXPECT_EQ(1, entropy(0, 10)); EXPECT_EQ(0, entropy(0, 5)); - y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; + y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }; setData(y, indices); ASSERT_NEAR(0.468996f, entropy(0, 10), precision); } - TEST_F(TestMetrics, EntropyDouble) { - y = {0, 0, 1, 2, 3}; - samples_t expected_entropies = {0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898}; + TEST_F(TestMetrics, EntropyDouble) + { + y = { 0, 0, 1, 2, 3 }; + samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898 }; for (auto idx = 0; idx < y.size(); ++idx) { ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision); } } - TEST_F(TestMetrics, InformationGain) { + TEST_F(TestMetrics, InformationGain) + { ASSERT_NEAR(1, informationGain(0, 5, 10), precision); ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache - y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1}; + y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }; setData(y, indices); ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision); }