#include "ArffFiles.h" #include #include #include #include using namespace std; ArffFiles::ArffFiles() { } vector ArffFiles::getLines() { return lines; } unsigned long int ArffFiles::getSize() { return lines.size(); } vector> ArffFiles::getAttributes() { return attributes; } string ArffFiles::getClassName() { return className; } string ArffFiles::getClassType() { return classType; } vector>& ArffFiles::getX() { return X; } vector& ArffFiles::getY() { return y; } void ArffFiles::load(string fileName, bool classLast) { ifstream file(fileName); string keyword, attribute, type; if (file.is_open()) { string line; while (getline(file, line)) { if (line[0] == '%' || line.empty() || line == "\r" || line == " ") { continue; } if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { stringstream ss(line); ss >> keyword >> attribute >> type; attributes.push_back({ attribute, type }); continue; } if (line[0] == '@') { continue; } lines.push_back(line); } file.close(); if (attributes.empty()) throw invalid_argument("No attributes found"); if (classLast) { className = get<0>(attributes.back()); classType = get<1>(attributes.back()); attributes.pop_back(); } else { className = get<0>(attributes.front()); classType = get<1>(attributes.front()); attributes.erase(attributes.begin()); } generateDataset(classLast); } else throw invalid_argument("Unable to open file"); } void ArffFiles::generateDataset(bool classLast) { X = vector>(attributes.size(), vector(lines.size())); vector yy = vector(lines.size(), ""); int labelIndex = classLast ? attributes.size() : 0; for (int i = 0; i < lines.size(); i++) { stringstream ss(lines[i]); string value; int pos = 0, xIndex = 0; while (getline(ss, value, ',')) { if (pos++ == labelIndex) { yy[i] = value; } else { X[xIndex++][i] = stof(value); } } } y = factorize(yy); } string ArffFiles::trim(const string& source) { string s(source); s.erase(0, s.find_first_not_of(" \n\r\t")); s.erase(s.find_last_not_of(" \n\r\t") + 1); return s; } vector ArffFiles::factorize(const vector& labels_t) { vector yy; yy.reserve(labels_t.size()); map labelMap; int i = 0; for (string label : labels_t) { if (labelMap.find(label) == labelMap.end()) { labelMap[label] = i++; } yy.push_back(labelMap[label]); } return yy; }