Remove platformUtils and split Datasets & Dataset

2023-09-29 18:20:46 +02:00 · 2023-09-29 18:20:46 +02:00 · 66ec1b343b
commit 66ec1b343b
parent bb423da42f
18 changed files with 326 additions and 434 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -22,26 +22,24 @@
            "type": "lldb",
            "request": "launch",
            "name": "experiment",
-            "program": "${workspaceFolder}/build/src/Platform/main",
+            "program": "${workspaceFolder}/build/src/Platform/b_main",
            "args": [
                "-m",
-                "BoostAODE",
-                "-p",
-                "/Users/rmontanana/Code/discretizbench/datasets",
+                "TAN",
                "--stratified",
                "-d",
-                "mfeat-morphological",
+                "zoo",
                "--discretize"
                // "--hyperparameters",
                // "{\"repeatSparent\": true, \"maxModels\": 12}"
            ],
-            "cwd": "/Users/rmontanana/Code/discretizbench",
+            "cwd": "/Users/rmontanana/Code/odtebench",
        },
        {
            "type": "lldb",
            "request": "launch",
            "name": "best",
-            "program": "${workspaceFolder}/build/src/Platform/best",
+            "program": "${workspaceFolder}/build/src/Platform/b_best",
            "args": [
                "-m",
                "BoostAODE",
@ -55,7 +53,7 @@
            "type": "lldb",
            "request": "launch",
            "name": "manage",
-            "program": "${workspaceFolder}/build/src/Platform/manage",
+            "program": "${workspaceFolder}/build/src/Platform/b_manage",
            "args": [
                "-n",
                "20"
@ -66,7 +64,7 @@
            "type": "lldb",
            "request": "launch",
            "name": "list",
-            "program": "${workspaceFolder}/build/src/Platform/list",
+            "program": "${workspaceFolder}/build/src/Platform/b_list",
            "args": [],
            "cwd": "/Users/rmontanana/Code/discretizbench",
        },
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@ -5,9 +5,9 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
 include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
 include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
 include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
-add_executable(b_main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
-add_executable(b_manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc ExcelFile.cc)
-add_executable(b_list list.cc platformUtils Datasets.cc)
+add_executable(b_main main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc)
+add_executable(b_manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
+add_executable(b_list list.cc Datasets.cc Dataset.cc)
 add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc)
 target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
--- a/src/Platform/Dataset.cc
+++ b/src/Platform/Dataset.cc
@ -0,0 +1,225 @@
+#include "Dataset.h"
+#include "ArffFiles.h"
+#include <fstream>
+namespace platform {
+    Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
+    {
+    }
+    string Dataset::getName() const
+    {
+        return name;
+    }
+    string Dataset::getClassName() const
+    {
+        return className;
+    }
+    vector<string> Dataset::getFeatures() const
+    {
+        if (loaded) {
+            return features;
+        } else {
+            throw invalid_argument("Dataset not loaded.");
+        }
+    }
+    int Dataset::getNFeatures() const
+    {
+        if (loaded) {
+            return n_features;
+        } else {
+            throw invalid_argument("Dataset not loaded.");
+        }
+    }
+    int Dataset::getNSamples() const
+    {
+        if (loaded) {
+            return n_samples;
+        } else {
+            throw invalid_argument("Dataset not loaded.");
+        }
+    }
+    map<string, vector<int>> Dataset::getStates() const
+    {
+        if (loaded) {
+            return states;
+        } else {
+            throw invalid_argument("Dataset not loaded.");
+        }
+    }
+    pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
+    {
+        if (loaded) {
+            return { Xv, yv };
+        } else {
+            throw invalid_argument("Dataset not loaded.");
+        }
+    }
+    pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
+    {
+        if (loaded) {
+            return { Xd, yv };
+        } else {
+            throw invalid_argument("Dataset not loaded.");
+        }
+    }
+    pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
+    {
+        if (loaded) {
+            buildTensors();
+            return { X, y };
+        } else {
+            throw invalid_argument("Dataset not loaded.");
+        }
+    }
+    void Dataset::load_csv()
+    {
+        ifstream file(path + "/" + name + ".csv");
+        if (file.is_open()) {
+            string line;
+            getline(file, line);
+            vector<string> tokens = split(line, ',');
+            features = vector<string>(tokens.begin(), tokens.end() - 1);
+            if (className == "-1") {
+                className = tokens.back();
+            }
+            for (auto i = 0; i < features.size(); ++i) {
+                Xv.push_back(vector<float>());
+            }
+            while (getline(file, line)) {
+                tokens = split(line, ',');
+                for (auto i = 0; i < features.size(); ++i) {
+                    Xv[i].push_back(stof(tokens[i]));
+                }
+                yv.push_back(stoi(tokens.back()));
+            }
+            file.close();
+        } else {
+            throw invalid_argument("Unable to open dataset file.");
+        }
+    }
+    void Dataset::computeStates()
+    {
+        for (int i = 0; i < features.size(); ++i) {
+            states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
+            auto item = states.at(features[i]);
+            iota(begin(item), end(item), 0);
+        }
+        states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
+        iota(begin(states.at(className)), end(states.at(className)), 0);
+    }
+    void Dataset::load_arff()
+    {
+        auto arff = ArffFiles();
+        arff.load(path + "/" + name + ".arff", className);
+        // Get Dataset X, y
+        Xv = arff.getX();
+        yv = arff.getY();
+        // Get className & Features
+        className = arff.getClassName();
+        auto attributes = arff.getAttributes();
+        transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
+    }
+    vector<string> tokenize(string line)
+    {
+        vector<string> tokens;
+        for (auto i = 0; i < line.size(); ++i) {
+            if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
+                string token = line.substr(0, i);
+                tokens.push_back(token);
+                line.erase(line.begin(), line.begin() + i + 1);
+                i = 0;
+                while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
+                    line.erase(line.begin(), line.begin() + i + 1);
+            }
+        }
+        if (line.size() > 0) {
+            tokens.push_back(line);
+        }
+        return tokens;
+    }
+    void Dataset::load_rdata()
+    {
+        ifstream file(path + "/" + name + "_R.dat");
+        if (file.is_open()) {
+            string line;
+            getline(file, line);
+            line = ArffFiles::trim(line);
+            vector<string> tokens = tokenize(line);
+            transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
+            if (className == "-1") {
+                className = ArffFiles::trim(tokens.back());
+            }
+            for (auto i = 0; i < features.size(); ++i) {
+                Xv.push_back(vector<float>());
+            }
+            while (getline(file, line)) {
+                tokens = tokenize(line);
+                // We have to skip the first token, which is the instance number.
+                for (auto i = 1; i < features.size() + 1; ++i) {
+                    const float value = stof(tokens[i]);
+                    Xv[i - 1].push_back(value);
+                }
+                yv.push_back(stoi(tokens.back()));
+            }
+            file.close();
+        } else {
+            throw invalid_argument("Unable to open dataset file.");
+        }
+    }
+    void Dataset::load()
+    {
+        if (loaded) {
+            return;
+        }
+        if (fileType == CSV) {
+            load_csv();
+        } else if (fileType == ARFF) {
+            load_arff();
+        } else if (fileType == RDATA) {
+            load_rdata();
+        }
+        if (discretize) {
+            Xd = discretizeDataset(Xv, yv);
+            computeStates();
+        }
+        n_samples = Xv[0].size();
+        n_features = Xv.size();
+        loaded = true;
+    }
+    void Dataset::buildTensors()
+    {
+        if (discretize) {
+            X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
+        } else {
+            X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
+        }
+        for (int i = 0; i < features.size(); ++i) {
+            if (discretize) {
+                X.index_put_({ i,  "..." }, torch::tensor(Xd[i], torch::kInt32));
+            } else {
+                X.index_put_({ i,  "..." }, torch::tensor(Xv[i], torch::kFloat32));
+            }
+        }
+        y = torch::tensor(yv, torch::kInt32);
+    }
+    vector<mdlp::labels_t> Dataset::discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
+    {
+        vector<mdlp::labels_t> Xd;
+        auto fimdlp = mdlp::CPPFImdlp();
+        for (int i = 0; i < X.size(); i++) {
+            fimdlp.fit(X[i], y);
+            mdlp::labels_t& xd = fimdlp.transform(X[i]);
+            Xd.push_back(xd);
+        }
+        return Xd;
+    }
+    vector<string> Dataset::split(const string& text, char delimiter)
+    {
+        vector<string> result;
+        stringstream ss(text);
+        string token;
+        while (getline(ss, token, delimiter)) {
+            result.push_back(token);
+        }
+        return result;
+    }
+}
--- a/src/Platform/Dataset.h
+++ b/src/Platform/Dataset.h
@ -0,0 +1,80 @@
+#ifndef DATASET_H
+#define DATASET_H
+#include <torch/torch.h>
+#include <map>
+#include <vector>
+#include <string>
+#include "CPPFImdlp.h"
+namespace platform {
+    using namespace std;
+
+    enum fileType_t { CSV, ARFF, RDATA };
+    class SourceData {
+    public:
+        SourceData(string source)
+        {
+            if (source == "Surcov") {
+                path = "datasets/";
+                fileType = CSV;
+            } else if (source == "Arff") {
+                path = "datasets/";
+                fileType = ARFF;
+            } else if (source == "Tanveer") {
+                path = "data/";
+                fileType = RDATA;
+            } else {
+                throw invalid_argument("Unknown source.");
+            }
+        }
+        string getPath()
+        {
+            return path;
+        }
+        fileType_t getFileType()
+        {
+            return fileType;
+        }
+    private:
+        string path;
+        fileType_t fileType;
+    };
+    class Dataset {
+    private:
+        string path;
+        string name;
+        fileType_t fileType;
+        string className;
+        int n_samples{ 0 }, n_features{ 0 };
+        vector<string> features;
+        map<string, vector<int>> states;
+        bool loaded;
+        bool discretize;
+        torch::Tensor X, y;
+        vector<vector<float>> Xv;
+        vector<vector<int>> Xd;
+        vector<int> yv;
+        void buildTensors();
+        void load_csv();
+        void load_arff();
+        void load_rdata();
+        void computeStates();
+        vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y);
+    public:
+        Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
+        explicit Dataset(const Dataset&);
+        static vector<string> split(const string& text, char delimiter);
+        string getName() const;
+        string getClassName() const;
+        vector<string> getFeatures() const;
+        map<string, vector<int>> getStates() const;
+        pair<vector<vector<float>>&, vector<int>&> getVectors();
+        pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
+        pair<torch::Tensor&, torch::Tensor&> getTensors();
+        int getNFeatures() const;
+        int getNSamples() const;
+        void load();
+        const bool inline isLoaded() const { return loaded; };
+    };
+};
+
+#endif
--- a/src/Platform/Datasets.cc
+++ b/src/Platform/Datasets.cc
@ -1,6 +1,4 @@
 #include "Datasets.h"
-#include "platformUtils.h"
-#include "ArffFiles.h"
 #include <fstream>
 namespace platform {
    void Datasets::load()
@ -15,7 +13,7 @@ namespace platform {
                if (line.empty() || line[0] == '#') {
                    continue;
                }
-                vector<string> tokens = split(line, ',');
+                vector<string> tokens = Dataset::split(line, ',');
                string name = tokens[0];
                string className;
                try {
@ -129,203 +127,4 @@ namespace platform {
    {
        return datasets.find(name) != datasets.end();
    }
-    Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
-    {
-    }
-    string Dataset::getName() const
-    {
-        return name;
-    }
-    string Dataset::getClassName() const
-    {
-        return className;
-    }
-    vector<string> Dataset::getFeatures() const
-    {
-        if (loaded) {
-            return features;
-        } else {
-            throw invalid_argument("Dataset not loaded.");
-        }
-    }
-    int Dataset::getNFeatures() const
-    {
-        if (loaded) {
-            return n_features;
-        } else {
-            throw invalid_argument("Dataset not loaded.");
-        }
-    }
-    int Dataset::getNSamples() const
-    {
-        if (loaded) {
-            return n_samples;
-        } else {
-            throw invalid_argument("Dataset not loaded.");
-        }
-    }
-    map<string, vector<int>> Dataset::getStates() const
-    {
-        if (loaded) {
-            return states;
-        } else {
-            throw invalid_argument("Dataset not loaded.");
-        }
-    }
-    pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
-    {
-        if (loaded) {
-            return { Xv, yv };
-        } else {
-            throw invalid_argument("Dataset not loaded.");
-        }
-    }
-    pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
-    {
-        if (loaded) {
-            return { Xd, yv };
-        } else {
-            throw invalid_argument("Dataset not loaded.");
-        }
-    }
-    pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
-    {
-        if (loaded) {
-            buildTensors();
-            return { X, y };
-        } else {
-            throw invalid_argument("Dataset not loaded.");
-        }
-    }
-    void Dataset::load_csv()
-    {
-        ifstream file(path + "/" + name + ".csv");
-        if (file.is_open()) {
-            string line;
-            getline(file, line);
-            vector<string> tokens = split(line, ',');
-            features = vector<string>(tokens.begin(), tokens.end() - 1);
-            if (className == "-1") {
-                className = tokens.back();
-            }
-            for (auto i = 0; i < features.size(); ++i) {
-                Xv.push_back(vector<float>());
-            }
-            while (getline(file, line)) {
-                tokens = split(line, ',');
-                for (auto i = 0; i < features.size(); ++i) {
-                    Xv[i].push_back(stof(tokens[i]));
-                }
-                yv.push_back(stoi(tokens.back()));
-            }
-            file.close();
-        } else {
-            throw invalid_argument("Unable to open dataset file.");
-        }
-    }
-    void Dataset::computeStates()
-    {
-        for (int i = 0; i < features.size(); ++i) {
-            states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
-            auto item = states.at(features[i]);
-            iota(begin(item), end(item), 0);
-        }
-        states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
-        iota(begin(states.at(className)), end(states.at(className)), 0);
-    }
-    void Dataset::load_arff()
-    {
-        auto arff = ArffFiles();
-        arff.load(path + "/" + name + ".arff", className);
-        // Get Dataset X, y
-        Xv = arff.getX();
-        yv = arff.getY();
-        // Get className & Features
-        className = arff.getClassName();
-        auto attributes = arff.getAttributes();
-        transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
-    }
-    vector<string> tokenize(string line)
-    {
-        vector<string> tokens;
-        for (auto i = 0; i < line.size(); ++i) {
-            if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
-                string token = line.substr(0, i);
-                tokens.push_back(token);
-                line.erase(line.begin(), line.begin() + i + 1);
-                i = 0;
-                while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
-                    line.erase(line.begin(), line.begin() + i + 1);
-            }
-        }
-        if (line.size() > 0) {
-            tokens.push_back(line);
-        }
-        return tokens;
-    }
-    void Dataset::load_rdata()
-    {
-        ifstream file(path + "/" + name + "_R.dat");
-        if (file.is_open()) {
-            string line;
-            getline(file, line);
-            line = ArffFiles::trim(line);
-            vector<string> tokens = tokenize(line);
-            transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
-            if (className == "-1") {
-                className = ArffFiles::trim(tokens.back());
-            }
-            for (auto i = 0; i < features.size(); ++i) {
-                Xv.push_back(vector<float>());
-            }
-            while (getline(file, line)) {
-                tokens = tokenize(line);
-                // We have to skip the first token, which is the instance number.
-                for (auto i = 1; i < features.size() + 1; ++i) {
-                    const float value = stof(tokens[i]);
-                    Xv[i - 1].push_back(value);
-                }
-                yv.push_back(stoi(tokens.back()));
-            }
-            file.close();
-        } else {
-            throw invalid_argument("Unable to open dataset file.");
-        }
-    }
-    void Dataset::load()
-    {
-        if (loaded) {
-            return;
-        }
-        if (fileType == CSV) {
-            load_csv();
-        } else if (fileType == ARFF) {
-            load_arff();
-        } else if (fileType == RDATA) {
-            load_rdata();
-        }
-        if (discretize) {
-            Xd = discretizeDataset(Xv, yv);
-            computeStates();
-        }
-        n_samples = Xv[0].size();
-        n_features = Xv.size();
-        loaded = true;
-    }
-    void Dataset::buildTensors()
-    {
-        if (discretize) {
-            X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
-        } else {
-            X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
-        }
-        for (int i = 0; i < features.size(); ++i) {
-            if (discretize) {
-                X.index_put_({ i,  "..." }, torch::tensor(Xd[i], torch::kInt32));
-            } else {
-                X.index_put_({ i,  "..." }, torch::tensor(Xv[i], torch::kFloat32));
-            }
-        }
-        y = torch::tensor(yv, torch::kInt32);
-    }
 }
--- a/src/Platform/Datasets.h
+++ b/src/Platform/Datasets.h
@ -1,76 +1,8 @@
 #ifndef DATASETS_H
 #define DATASETS_H
-#include <torch/torch.h>
-#include <map>
-#include <vector>
-#include <string>
+#include "Dataset.h"
 namespace platform {
    using namespace std;
-    enum fileType_t { CSV, ARFF, RDATA };
-    class SourceData {
-    public:
-        SourceData(string source)
-        {
-            if (source == "Surcov") {
-                path = "datasets/";
-                fileType = CSV;
-            } else if (source == "Arff") {
-                path = "datasets/";
-                fileType = ARFF;
-            } else if (source == "Tanveer") {
-                path = "data/";
-                fileType = RDATA;
-            } else {
-                throw invalid_argument("Unknown source.");
-            }
-        }
-        string getPath()
-        {
-            return path;
-        }
-        fileType_t getFileType()
-        {
-            return fileType;
-        }
-    private:
-        string path;
-        fileType_t fileType;
-    };
-    class Dataset {
-    private:
-        string path;
-        string name;
-        fileType_t fileType;
-        string className;
-        int n_samples{ 0 }, n_features{ 0 };
-        vector<string> features;
-        map<string, vector<int>> states;
-        bool loaded;
-        bool discretize;
-        torch::Tensor X, y;
-        vector<vector<float>> Xv;
-        vector<vector<int>> Xd;
-        vector<int> yv;
-        void buildTensors();
-        void load_csv();
-        void load_arff();
-        void load_rdata();
-        void computeStates();
-    public:
-        Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
-        explicit Dataset(const Dataset&);
-        string getName() const;
-        string getClassName() const;
-        vector<string> getFeatures() const;
-        map<string, vector<int>> getStates() const;
-        pair<vector<vector<float>>&, vector<int>&> getVectors();
-        pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
-        pair<torch::Tensor&, torch::Tensor&> getTensors();
-        int getNFeatures() const;
-        int getNSamples() const;
-        void load();
-        const bool inline isLoaded() const { return loaded; };
-    };
    class Datasets {
    private:
        string path;
--- a/src/Platform/DotEnv.h
+++ b/src/Platform/DotEnv.h
@ -4,7 +4,7 @@
 #include <map>
 #include <fstream>
 #include <sstream>
-#include "platformUtils.h"
+#include "Dataset.h"
 namespace platform {
    class DotEnv {
    private:
@ -51,7 +51,7 @@ namespace platform {
            auto seeds_str = env["seeds"];
            seeds_str = trim(seeds_str);
            seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
-            auto seeds_str_split = split(seeds_str, ',');
+            auto seeds_str_split = Dataset::split(seeds_str, ',');
            transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
                return stoi(str);
                });
--- a/src/Platform/Experiment.cc
+++ b/src/Platform/Experiment.cc
@ -102,12 +102,12 @@ namespace platform {
        cout << data.dump(4) << endl;
    }

-    void Experiment::go(vector<string> filesToProcess, const string& path)
+    void Experiment::go(vector<string> filesToProcess)
    {
        cout << "*** Starting experiment: " << title << " ***" << endl;
        for (auto fileName : filesToProcess) {
            cout << "- " << setw(20) << left << fileName << " " << right << flush;
-            cross_validation(path, fileName);
+            cross_validation(fileName);
            cout << endl;
        }
    }
@ -132,7 +132,7 @@ namespace platform {
        cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;

    }
-    void Experiment::cross_validation(const string& path, const string& fileName)
+    void Experiment::cross_validation(const string& fileName)
    {
        auto env = platform::DotEnv();
        auto datasets = platform::Datasets(discretized, env.get("source_data"));
--- a/src/Platform/Experiment.h
+++ b/src/Platform/Experiment.h
@ -108,8 +108,8 @@ namespace platform {
        Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
        string get_file_name();
        void save(const string& path);
-        void cross_validation(const string& path, const string& fileName);
-        void go(vector<string> filesToProcess, const string& path);
+        void cross_validation(const string& fileName);
+        void go(vector<string> filesToProcess);
        void show();
        void report();
    };
--- a/src/Platform/Paths.h
+++ b/src/Platform/Paths.h
@ -4,7 +4,6 @@
 namespace platform {
    class Paths {
    public:
-        static std::string datasets() { return "datasets/"; }
        static std::string results() { return "results/"; }
        static std::string excel() { return "excel/"; }
    };
--- a/src/Platform/Results.cc
+++ b/src/Platform/Results.cc
@ -1,5 +1,4 @@
 #include <filesystem>
-#include "platformUtils.h"
 #include "Results.h"
 #include "ReportConsole.h"
 #include "ReportExcel.h"
--- a/src/Platform/main.cc
+++ b/src/Platform/main.cc
@ -1,7 +1,6 @@
 #include <iostream>
 #include <argparse/argparse.hpp>
 #include <nlohmann/json.hpp>
-#include "platformUtils.h"
 #include "Experiment.h"
 #include "Datasets.h"
 #include "DotEnv.h"
@ -19,9 +18,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
    argparse::ArgumentParser program("main");
    program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
    program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
-    program.add_argument("-p", "--path")
-        .help("folder where the data files are located, default")
-        .default_value(string{ platform::Paths::datasets() });
    program.add_argument("-m", "--model")
        .help("Model to use " + platform::Models::instance()->toString())
        .action([](const std::string& value) {
@ -55,13 +51,11 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
    try {
        program.parse_args(argc, argv);
        auto file_name = program.get<string>("dataset");
-        auto path = program.get<string>("path");
        auto model_name = program.get<string>("model");
        auto discretize_dataset = program.get<bool>("discretize");
        auto stratified = program.get<bool>("stratified");
        auto n_folds = program.get<int>("folds");
        auto seeds = program.get<vector<int>>("seeds");
-        auto complete_file_name = path + file_name + ".arff";
        auto title = program.get<string>("title");
        auto hyperparameters = program.get<string>("hyperparameters");
        auto saveResults = program.get<bool>("save");
@ -81,7 +75,6 @@ int main(int argc, char** argv)
 {
    auto program = manageArguments(argc, argv);
    auto file_name = program.get<string>("dataset");
-    auto path = program.get<string>("path");
    auto model_name = program.get<string>("model");
    auto discretize_dataset = program.get<bool>("discretize");
    auto stratified = program.get<bool>("stratified");
@ -120,7 +113,7 @@ int main(int argc, char** argv)
    }
    platform::Timer timer;
    timer.start();
-    experiment.go(filesToTest, path);
+    experiment.go(filesToTest);
    experiment.setDuration(timer.getDuration());
    if (saveResults) {
        experiment.save(platform::Paths::results());
--- a/src/Platform/manage.cc
+++ b/src/Platform/manage.cc
@ -1,6 +1,5 @@
 #include <iostream>
 #include <argparse/argparse.hpp>
-#include "platformUtils.h"
 #include "Paths.h"
 #include "Results.h"

--- a/src/Platform/platformUtils.cc
+++ b/src/Platform/platformUtils.cc
@ -1,110 +0,0 @@
-#include "platformUtils.h"
-#include "Paths.h"
-
-using namespace torch;
-
-vector<string> split(const string& text, char delimiter)
-{
-    vector<string> result;
-    stringstream ss(text);
-    string token;
-    while (getline(ss, token, delimiter)) {
-        result.push_back(token);
-    }
-    return result;
-}
-
-pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features)
-{
-    vector<mdlp::labels_t> Xd;
-    map<string, int> maxes;
-    auto fimdlp = mdlp::CPPFImdlp();
-    for (int i = 0; i < X.size(); i++) {
-        fimdlp.fit(X[i], y);
-        mdlp::labels_t& xd = fimdlp.transform(X[i]);
-        maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
-        Xd.push_back(xd);
-    }
-    return { Xd, maxes };
-}
-
-vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
-{
-    vector<mdlp::labels_t> Xd;
-    auto fimdlp = mdlp::CPPFImdlp();
-    for (int i = 0; i < X.size(); i++) {
-        fimdlp.fit(X[i], y);
-        mdlp::labels_t& xd = fimdlp.transform(X[i]);
-        Xd.push_back(xd);
-    }
-    return Xd;
-}
-
-bool file_exists(const string& name)
-{
-    if (FILE* file = fopen(name.c_str(), "r")) {
-        fclose(file);
-        return true;
-    } else {
-        return false;
-    }
-}
-
-tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset)
-{
-    auto handler = ArffFiles();
-    handler.load(path + static_cast<string>(name) + ".arff", class_last);
-    // Get Dataset X, y
-    vector<mdlp::samples_t>& X = handler.getX();
-    mdlp::labels_t& y = handler.getY();
-    // Get className & Features
-    auto className = handler.getClassName();
-    vector<string> features;
-    auto attributes = handler.getAttributes();
-    transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
-    Tensor Xd;
-    auto states = map<string, vector<int>>();
-    if (discretize_dataset) {
-        auto Xr = discretizeDataset(X, y);
-        Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
-        for (int i = 0; i < features.size(); ++i) {
-            states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
-            auto item = states.at(features[i]);
-            iota(begin(item), end(item), 0);
-            Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
-        }
-        states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
-        iota(begin(states.at(className)), end(states.at(className)), 0);
-    } else {
-        Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
-        for (int i = 0; i < features.size(); ++i) {
-            Xd.index_put_({ "...", i }, torch::tensor(X[i]));
-        }
-    }
-    return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
-}
-
-tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name)
-{
-    auto handler = ArffFiles();
-    handler.load(platform::Paths::datasets() + static_cast<string>(name) + ".arff");
-    // Get Dataset X, y
-    vector<mdlp::samples_t>& X = handler.getX();
-    mdlp::labels_t& y = handler.getY();
-    // Get className & Features
-    auto className = handler.getClassName();
-    vector<string> features;
-    auto attributes = handler.getAttributes();
-    transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
-    // Discretize Dataset
-    vector<mdlp::labels_t> Xd;
-    map<string, int> maxes;
-    tie(Xd, maxes) = discretize(X, y, features);
-    maxes[className] = *max_element(y.begin(), y.end()) + 1;
-    map<string, vector<int>> states;
-    for (auto feature : features) {
-        states[feature] = vector<int>(maxes[feature]);
-    }
-    states[className] = vector<int>(maxes[className]);
-    return { Xd, y, features, className, states };
-}
--- a/src/Platform/platformUtils.h
+++ b/src/Platform/platformUtils.h
@ -1,20 +0,0 @@
-#ifndef PLATFORM_UTILS_H
-#define PLATFORM_UTILS_H
-#include <torch/torch.h>
-#include <string>
-#include <vector>
-#include <map>
-#include <tuple>
-#include "ArffFiles.h"
-#include "CPPFImdlp.h"
-using namespace std;
-
-bool file_exists(const std::string& name);
-vector<string> split(const string& text, char delimiter);
-pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
-vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y);
-pair<torch::Tensor, map<string, vector<int>>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string>& features, const string& className);
-tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name);
-tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset);
-map<string, vector<int>> get_states(vector<string>& features, string className, map<string, int>& maxes);
-#endif //PLATFORM_UTILS_H
--- a/tests/BayesModels.cc
+++ b/tests/BayesModels.cc
@ -9,7 +9,6 @@
 #include "TAN.h"
 #include "SPODE.h"
 #include "AODE.h"
-#include "platformUtils.h"

 TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
 {
--- a/tests/BayesNetwork.cc
+++ b/tests/BayesNetwork.cc
@ -3,7 +3,6 @@
 #include <catch2/generators/catch_generators.hpp>
 #include <string>
 #include "KDB.h"
-#include "platformUtils.h"

 TEST_CASE("Test Bayesian Network")
 {
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -5,7 +5,7 @@ if(ENABLE_TESTING)
    include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
    include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
    include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
-    set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCE_DIR}/src/Platform/platformUtils.cc ${BayesNet_SOURCES})
+    set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCES})
    add_executable(${TEST_MAIN} ${TEST_SOURCES})
    target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
    add_test(NAME ${TEST_MAIN} COMMAND ${TEST_MAIN})