From 7368dd9ff4d40d089c69c2a82cd307523da3682d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Mon, 20 Mar 2023 17:45:58 +0100 Subject: [PATCH] Refactor ArffFiles in main project --- src/fimdlp/ArffFiles.cpp | 128 ++++++++++++++++++++------------------- src/fimdlp/ArffFiles.h | 39 ++++++++---- 2 files changed, 95 insertions(+), 72 deletions(-) diff --git a/src/fimdlp/ArffFiles.cpp b/src/fimdlp/ArffFiles.cpp index 4fbca78..405a57e 100644 --- a/src/fimdlp/ArffFiles.cpp +++ b/src/fimdlp/ArffFiles.cpp @@ -2,86 +2,92 @@ #include #include #include -#include using namespace std; -ArffFiles::ArffFiles() -{ -} -vector ArffFiles::getLines() -{ +ArffFiles::ArffFiles() = default; + +vector ArffFiles::getLines() const { return lines; } -unsigned long int ArffFiles::getSize() -{ + +unsigned long int ArffFiles::getSize() const { return lines.size(); } -vector> ArffFiles::getAttributes() -{ + +vector> ArffFiles::getAttributes() const { return attributes; } -string ArffFiles::getClassName() -{ + +string ArffFiles::getClassName() const { return className; } -string ArffFiles::getClassType() -{ + +string ArffFiles::getClassType() const { return classType; } -vector>& ArffFiles::getX() -{ + +vector> &ArffFiles::getX() { return X; } -vector& ArffFiles::getY() -{ + +vector &ArffFiles::getY() { return y; } -void ArffFiles::load(string fileName, bool classLast) -{ + +void ArffFiles::load(const string &fileName, bool classLast) { ifstream file(fileName); - if (file.is_open()) { - string line, keyword, attribute, type; - while (getline(file, line)) { - if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { - continue; - } - if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { - stringstream ss(line); - ss >> keyword >> attribute >> type; - attributes.push_back({ attribute, type }); - continue; - } - if (line[0] == '@') { - continue; - } - lines.push_back(line); - } - file.close(); - if (attributes.empty()) - throw invalid_argument("No attributes found"); - if (classLast) { - className = get<0>(attributes.back()); - classType = get<1>(attributes.back()); - attributes.pop_back(); - } else { - className = get<0>(attributes.front()); - classType = get<1>(attributes.front()); - attributes.erase(attributes.begin()); - } - generateDataset(classLast); - } else + if (!file.is_open()) { throw invalid_argument("Unable to open file"); + } + string line; + string keyword; + string attribute; + string type; + string type_w; + while (getline(file, line)) { + if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { + continue; + } + if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { + stringstream ss(line); + ss >> keyword >> attribute; + type = ""; + while (ss >> type_w) + type += type_w + " "; + attributes.emplace_back(attribute, type); + continue; + } + if (line[0] == '@') { + continue; + } + lines.push_back(line); + } + file.close(); + if (attributes.empty()) + throw invalid_argument("No attributes found"); + if (classLast) { + className = get<0>(attributes.back()); + classType = get<1>(attributes.back()); + attributes.pop_back(); + } else { + className = get<0>(attributes.front()); + classType = get<1>(attributes.front()); + attributes.erase(attributes.begin()); + } + generateDataset(classLast); + } -void ArffFiles::generateDataset(bool classLast) -{ + +void ArffFiles::generateDataset(bool classLast) { X = vector>(attributes.size(), vector(lines.size())); - vector yy = vector(lines.size(), ""); - int labelIndex = classLast ? attributes.size() : 0; + auto yy = vector(lines.size(), ""); + int labelIndex = classLast ? static_cast(attributes.size()) : 0; for (size_t i = 0; i < lines.size(); i++) { stringstream ss(lines[i]); string value; - int pos = 0, xIndex = 0; + int pos = 0; + int xIndex = 0; while (getline(ss, value, ',')) { if (pos++ == labelIndex) { yy[i] = value; @@ -92,20 +98,20 @@ void ArffFiles::generateDataset(bool classLast) } y = factorize(yy); } -string ArffFiles::trim(const string& source) -{ + +string ArffFiles::trim(const string &source) { string s(source); s.erase(0, s.find_first_not_of(" \n\r\t")); s.erase(s.find_last_not_of(" \n\r\t") + 1); return s; } -vector ArffFiles::factorize(const vector& labels_t) -{ + +vector ArffFiles::factorize(const vector &labels_t) { vector yy; yy.reserve(labels_t.size()); map labelMap; int i = 0; - for (string label : labels_t) { + for (const string &label: labels_t) { if (labelMap.find(label) == labelMap.end()) { labelMap[label] = i++; } diff --git a/src/fimdlp/ArffFiles.h b/src/fimdlp/ArffFiles.h index b56d28d..38531af 100644 --- a/src/fimdlp/ArffFiles.h +++ b/src/fimdlp/ArffFiles.h @@ -1,27 +1,44 @@ #ifndef ARFFFILES_H #define ARFFFILES_H + #include #include + using namespace std; + class ArffFiles { private: vector lines; vector> attributes; - string className, classType; + string className; + string classType; vector> X; vector y; + void generateDataset(bool); + public: ArffFiles(); - void load(string, bool = true); - vector getLines(); - unsigned long int getSize(); - string getClassName(); - string getClassType(); - string trim(const string&); - vector>& getX(); - vector& getY(); - vector> getAttributes(); - vector factorize(const vector& labels_t); + + void load(const string &, bool = true); + + vector getLines() const; + + unsigned long int getSize() const; + + string getClassName() const; + + string getClassType() const; + + static string trim(const string &); + + vector> &getX(); + + vector &getY(); + + vector> getAttributes() const; + + static vector factorize(const vector &labels_t); }; + #endif \ No newline at end of file