Refactor ArffFiles in main project

This commit is contained in:
2023-03-20 17:45:58 +01:00
parent b5c6a49e19
commit 7368dd9ff4
2 changed files with 95 additions and 72 deletions

View File

@@ -2,86 +2,92 @@
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <map> #include <map>
#include <iostream>
using namespace std; using namespace std;
ArffFiles::ArffFiles() ArffFiles::ArffFiles() = default;
{
} vector<string> ArffFiles::getLines() const {
vector<string> ArffFiles::getLines()
{
return lines; return lines;
} }
unsigned long int ArffFiles::getSize()
{ unsigned long int ArffFiles::getSize() const {
return lines.size(); return lines.size();
} }
vector<pair<string, string>> ArffFiles::getAttributes()
{ vector<pair<string, string>> ArffFiles::getAttributes() const {
return attributes; return attributes;
} }
string ArffFiles::getClassName()
{ string ArffFiles::getClassName() const {
return className; return className;
} }
string ArffFiles::getClassType()
{ string ArffFiles::getClassType() const {
return classType; return classType;
} }
vector<vector<float>>& ArffFiles::getX()
{ vector<vector<float>> &ArffFiles::getX() {
return X; return X;
} }
vector<int>& ArffFiles::getY()
{ vector<int> &ArffFiles::getY() {
return y; return y;
} }
void ArffFiles::load(string fileName, bool classLast)
{ void ArffFiles::load(const string &fileName, bool classLast) {
ifstream file(fileName); ifstream file(fileName);
if (file.is_open()) { if (!file.is_open()) {
string line, keyword, attribute, type;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
ss >> keyword >> attribute >> type;
attributes.push_back({ attribute, type });
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw invalid_argument("No attributes found");
if (classLast) {
className = get<0>(attributes.back());
classType = get<1>(attributes.back());
attributes.pop_back();
} else {
className = get<0>(attributes.front());
classType = get<1>(attributes.front());
attributes.erase(attributes.begin());
}
generateDataset(classLast);
} else
throw invalid_argument("Unable to open file"); throw invalid_argument("Unable to open file");
}
string line;
string keyword;
string attribute;
string type;
string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(attribute, type);
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw invalid_argument("No attributes found");
if (classLast) {
className = get<0>(attributes.back());
classType = get<1>(attributes.back());
attributes.pop_back();
} else {
className = get<0>(attributes.front());
classType = get<1>(attributes.front());
attributes.erase(attributes.begin());
}
generateDataset(classLast);
} }
void ArffFiles::generateDataset(bool classLast)
{ void ArffFiles::generateDataset(bool classLast) {
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size())); X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
vector<string> yy = vector<string>(lines.size(), ""); auto yy = vector<string>(lines.size(), "");
int labelIndex = classLast ? attributes.size() : 0; int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
for (size_t i = 0; i < lines.size(); i++) { for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]); stringstream ss(lines[i]);
string value; string value;
int pos = 0, xIndex = 0; int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) { while (getline(ss, value, ',')) {
if (pos++ == labelIndex) { if (pos++ == labelIndex) {
yy[i] = value; yy[i] = value;
@@ -92,20 +98,20 @@ void ArffFiles::generateDataset(bool classLast)
} }
y = factorize(yy); y = factorize(yy);
} }
string ArffFiles::trim(const string& source)
{ string ArffFiles::trim(const string &source) {
string s(source); string s(source);
s.erase(0, s.find_first_not_of(" \n\r\t")); s.erase(0, s.find_first_not_of(" \n\r\t"));
s.erase(s.find_last_not_of(" \n\r\t") + 1); s.erase(s.find_last_not_of(" \n\r\t") + 1);
return s; return s;
} }
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
{ vector<int> ArffFiles::factorize(const vector<string> &labels_t) {
vector<int> yy; vector<int> yy;
yy.reserve(labels_t.size()); yy.reserve(labels_t.size());
map<string, int> labelMap; map<string, int> labelMap;
int i = 0; int i = 0;
for (string label : labels_t) { for (const string &label: labels_t) {
if (labelMap.find(label) == labelMap.end()) { if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++; labelMap[label] = i++;
} }

View File

@@ -1,27 +1,44 @@
#ifndef ARFFFILES_H #ifndef ARFFFILES_H
#define ARFFFILES_H #define ARFFFILES_H
#include <string> #include <string>
#include <vector> #include <vector>
using namespace std; using namespace std;
class ArffFiles { class ArffFiles {
private: private:
vector<string> lines; vector<string> lines;
vector<pair<string, string>> attributes; vector<pair<string, string>> attributes;
string className, classType; string className;
string classType;
vector<vector<float>> X; vector<vector<float>> X;
vector<int> y; vector<int> y;
void generateDataset(bool); void generateDataset(bool);
public: public:
ArffFiles(); ArffFiles();
void load(string, bool = true);
vector<string> getLines(); void load(const string &, bool = true);
unsigned long int getSize();
string getClassName(); vector<string> getLines() const;
string getClassType();
string trim(const string&); unsigned long int getSize() const;
vector<vector<float>>& getX();
vector<int>& getY(); string getClassName() const;
vector<pair<string, string>> getAttributes();
vector<int> factorize(const vector<string>& labels_t); string getClassType() const;
static string trim(const string &);
vector<vector<float>> &getX();
vector<int> &getY();
vector<pair<string, string>> getAttributes() const;
static vector<int> factorize(const vector<string> &labels_t);
}; };
#endif #endif