Add csv and R_dat files to platform
This commit is contained in:
parent
db17c14042
commit
bb423da42f
@ -5,13 +5,25 @@
|
|||||||
namespace platform {
|
namespace platform {
|
||||||
void Datasets::load()
|
void Datasets::load()
|
||||||
{
|
{
|
||||||
|
auto sd = SourceData(sfileType);
|
||||||
|
fileType = sd.getFileType();
|
||||||
|
path = sd.getPath();
|
||||||
ifstream catalog(path + "all.txt");
|
ifstream catalog(path + "all.txt");
|
||||||
if (catalog.is_open()) {
|
if (catalog.is_open()) {
|
||||||
string line;
|
string line;
|
||||||
while (getline(catalog, line)) {
|
while (getline(catalog, line)) {
|
||||||
|
if (line.empty() || line[0] == '#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
vector<string> tokens = split(line, ',');
|
vector<string> tokens = split(line, ',');
|
||||||
string name = tokens[0];
|
string name = tokens[0];
|
||||||
string className = tokens[1];
|
string className;
|
||||||
|
try {
|
||||||
|
className = tokens[1];
|
||||||
|
}
|
||||||
|
catch (exception e) {
|
||||||
|
className = "-1";
|
||||||
|
}
|
||||||
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
|
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
|
||||||
}
|
}
|
||||||
catalog.close();
|
catalog.close();
|
||||||
@ -193,7 +205,9 @@ namespace platform {
|
|||||||
getline(file, line);
|
getline(file, line);
|
||||||
vector<string> tokens = split(line, ',');
|
vector<string> tokens = split(line, ',');
|
||||||
features = vector<string>(tokens.begin(), tokens.end() - 1);
|
features = vector<string>(tokens.begin(), tokens.end() - 1);
|
||||||
className = tokens.back();
|
if (className == "-1") {
|
||||||
|
className = tokens.back();
|
||||||
|
}
|
||||||
for (auto i = 0; i < features.size(); ++i) {
|
for (auto i = 0; i < features.size(); ++i) {
|
||||||
Xv.push_back(vector<float>());
|
Xv.push_back(vector<float>());
|
||||||
}
|
}
|
||||||
@ -231,6 +245,53 @@ namespace platform {
|
|||||||
auto attributes = arff.getAttributes();
|
auto attributes = arff.getAttributes();
|
||||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
|
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
|
||||||
}
|
}
|
||||||
|
vector<string> tokenize(string line)
|
||||||
|
{
|
||||||
|
vector<string> tokens;
|
||||||
|
for (auto i = 0; i < line.size(); ++i) {
|
||||||
|
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
|
||||||
|
string token = line.substr(0, i);
|
||||||
|
tokens.push_back(token);
|
||||||
|
line.erase(line.begin(), line.begin() + i + 1);
|
||||||
|
i = 0;
|
||||||
|
while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
|
||||||
|
line.erase(line.begin(), line.begin() + i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (line.size() > 0) {
|
||||||
|
tokens.push_back(line);
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
void Dataset::load_rdata()
|
||||||
|
{
|
||||||
|
ifstream file(path + "/" + name + "_R.dat");
|
||||||
|
if (file.is_open()) {
|
||||||
|
string line;
|
||||||
|
getline(file, line);
|
||||||
|
line = ArffFiles::trim(line);
|
||||||
|
vector<string> tokens = tokenize(line);
|
||||||
|
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
|
||||||
|
if (className == "-1") {
|
||||||
|
className = ArffFiles::trim(tokens.back());
|
||||||
|
}
|
||||||
|
for (auto i = 0; i < features.size(); ++i) {
|
||||||
|
Xv.push_back(vector<float>());
|
||||||
|
}
|
||||||
|
while (getline(file, line)) {
|
||||||
|
tokens = tokenize(line);
|
||||||
|
// We have to skip the first token, which is the instance number.
|
||||||
|
for (auto i = 1; i < features.size() + 1; ++i) {
|
||||||
|
const float value = stof(tokens[i]);
|
||||||
|
Xv[i - 1].push_back(value);
|
||||||
|
}
|
||||||
|
yv.push_back(stoi(tokens.back()));
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Unable to open dataset file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
void Dataset::load()
|
void Dataset::load()
|
||||||
{
|
{
|
||||||
if (loaded) {
|
if (loaded) {
|
||||||
@ -240,6 +301,8 @@ namespace platform {
|
|||||||
load_csv();
|
load_csv();
|
||||||
} else if (fileType == ARFF) {
|
} else if (fileType == ARFF) {
|
||||||
load_arff();
|
load_arff();
|
||||||
|
} else if (fileType == RDATA) {
|
||||||
|
load_rdata();
|
||||||
}
|
}
|
||||||
if (discretize) {
|
if (discretize) {
|
||||||
Xd = discretizeDataset(Xv, yv);
|
Xd = discretizeDataset(Xv, yv);
|
||||||
|
@ -6,7 +6,36 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
namespace platform {
|
namespace platform {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
enum fileType_t { CSV, ARFF };
|
enum fileType_t { CSV, ARFF, RDATA };
|
||||||
|
class SourceData {
|
||||||
|
public:
|
||||||
|
SourceData(string source)
|
||||||
|
{
|
||||||
|
if (source == "Surcov") {
|
||||||
|
path = "datasets/";
|
||||||
|
fileType = CSV;
|
||||||
|
} else if (source == "Arff") {
|
||||||
|
path = "datasets/";
|
||||||
|
fileType = ARFF;
|
||||||
|
} else if (source == "Tanveer") {
|
||||||
|
path = "data/";
|
||||||
|
fileType = RDATA;
|
||||||
|
} else {
|
||||||
|
throw invalid_argument("Unknown source.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
string getPath()
|
||||||
|
{
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
fileType_t getFileType()
|
||||||
|
{
|
||||||
|
return fileType;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
string path;
|
||||||
|
fileType_t fileType;
|
||||||
|
};
|
||||||
class Dataset {
|
class Dataset {
|
||||||
private:
|
private:
|
||||||
string path;
|
string path;
|
||||||
@ -25,6 +54,7 @@ namespace platform {
|
|||||||
void buildTensors();
|
void buildTensors();
|
||||||
void load_csv();
|
void load_csv();
|
||||||
void load_arff();
|
void load_arff();
|
||||||
|
void load_rdata();
|
||||||
void computeStates();
|
void computeStates();
|
||||||
public:
|
public:
|
||||||
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
|
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
|
||||||
@ -45,11 +75,12 @@ namespace platform {
|
|||||||
private:
|
private:
|
||||||
string path;
|
string path;
|
||||||
fileType_t fileType;
|
fileType_t fileType;
|
||||||
|
string sfileType;
|
||||||
map<string, unique_ptr<Dataset>> datasets;
|
map<string, unique_ptr<Dataset>> datasets;
|
||||||
bool discretize;
|
bool discretize;
|
||||||
void load(); // Loads the list of datasets
|
void load(); // Loads the list of datasets
|
||||||
public:
|
public:
|
||||||
explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
|
explicit Datasets(bool discretize, string sfileType) : discretize(discretize), sfileType(sfileType) { load(); };
|
||||||
vector<string> getNames();
|
vector<string> getNames();
|
||||||
vector<string> getFeatures(const string& name) const;
|
vector<string> getFeatures(const string& name) const;
|
||||||
int getNSamples(const string& name) const;
|
int getNSamples(const string& name) const;
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
|
#include <fstream>
|
||||||
#include "Experiment.h"
|
#include "Experiment.h"
|
||||||
#include "Datasets.h"
|
#include "Datasets.h"
|
||||||
#include "Models.h"
|
#include "Models.h"
|
||||||
#include "ReportConsole.h"
|
#include "ReportConsole.h"
|
||||||
#include <fstream>
|
#include "DotEnv.h"
|
||||||
namespace platform {
|
namespace platform {
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
string get_date()
|
string get_date()
|
||||||
@ -133,7 +134,8 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
void Experiment::cross_validation(const string& path, const string& fileName)
|
void Experiment::cross_validation(const string& path, const string& fileName)
|
||||||
{
|
{
|
||||||
auto datasets = platform::Datasets(path, discretized, platform::ARFF);
|
auto env = platform::DotEnv();
|
||||||
|
auto datasets = platform::Datasets(discretized, env.get("source_data"));
|
||||||
// Get dataset
|
// Get dataset
|
||||||
auto [X, y] = datasets.getTensors(fileName);
|
auto [X, y] = datasets.getTensors(fileName);
|
||||||
auto states = datasets.getStates(fileName);
|
auto states = datasets.getStates(fileName);
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
#include "Datasets.h"
|
#include "Datasets.h"
|
||||||
#include "ReportBase.h"
|
#include "ReportBase.h"
|
||||||
#include "BestScore.h"
|
#include "BestScore.h"
|
||||||
|
#include "DotEnv.h"
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
|
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
|
||||||
@ -58,7 +58,8 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (data["score_name"].get<string>() == "accuracy") {
|
if (data["score_name"].get<string>() == "accuracy") {
|
||||||
auto dt = Datasets(Paths::datasets(), false);
|
auto env = platform::DotEnv();
|
||||||
|
auto dt = Datasets(false, env.get("source_data"));
|
||||||
dt.loadDataset(dataset);
|
dt.loadDataset(dataset);
|
||||||
auto numClasses = dt.getNClasses(dataset);
|
auto numClasses = dt.getNClasses(dataset);
|
||||||
if (numClasses == 2) {
|
if (numClasses == 2) {
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include "Paths.h"
|
#include "Paths.h"
|
||||||
#include "Colors.h"
|
#include "Colors.h"
|
||||||
#include "Datasets.h"
|
#include "Datasets.h"
|
||||||
|
#include "DotEnv.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
const int BALANCE_LENGTH = 75;
|
const int BALANCE_LENGTH = 75;
|
||||||
@ -27,7 +28,8 @@ void outputBalance(const string& balance)
|
|||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
auto data = platform::Datasets(platform::Paths().datasets(), false);
|
auto env = platform::DotEnv();
|
||||||
|
auto data = platform::Datasets(false, env.get("source_data"));
|
||||||
locale mylocale(cout.getloc(), new separated);
|
locale mylocale(cout.getloc(), new separated);
|
||||||
locale::global(mylocale);
|
locale::global(mylocale);
|
||||||
cout.imbue(mylocale);
|
cout.imbue(mylocale);
|
||||||
|
@ -89,7 +89,8 @@ int main(int argc, char** argv)
|
|||||||
auto seeds = program.get<vector<int>>("seeds");
|
auto seeds = program.get<vector<int>>("seeds");
|
||||||
auto hyperparameters = program.get<string>("hyperparameters");
|
auto hyperparameters = program.get<string>("hyperparameters");
|
||||||
vector<string> filesToTest;
|
vector<string> filesToTest;
|
||||||
auto datasets = platform::Datasets(path, true, platform::ARFF);
|
auto env = platform::DotEnv();
|
||||||
|
auto datasets = platform::Datasets(discretize_dataset, env.get("source_data"));
|
||||||
auto title = program.get<string>("title");
|
auto title = program.get<string>("title");
|
||||||
auto saveResults = program.get<bool>("save");
|
auto saveResults = program.get<bool>("save");
|
||||||
if (file_name != "") {
|
if (file_name != "") {
|
||||||
@ -108,7 +109,7 @@ int main(int argc, char** argv)
|
|||||||
/*
|
/*
|
||||||
* Begin Processing
|
* Begin Processing
|
||||||
*/
|
*/
|
||||||
auto env = platform::DotEnv();
|
|
||||||
auto experiment = platform::Experiment();
|
auto experiment = platform::Experiment();
|
||||||
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
|
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
|
||||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
|
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
|
||||||
|
Loading…
Reference in New Issue
Block a user