Remove old Files library

2024-05-26 17:25:36 +02:00
parent df82f82e88
commit e3a06264a9
8 changed files with 129 additions and 341 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,7 +88,6 @@ message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
 ## Configure test data path
 cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
 configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
 add_subdirectory(lib/Files)
 add_subdirectory(config)
 add_subdirectory(src)
 add_subdirectory(sample)
--- a/lib/Files/ArffFiles.cc
+++ b/lib/Files/ArffFiles.cc
@@ -1,176 +0,0 @@
 #include "ArffFiles.h"
 #include <fstream>
 #include <sstream>
 #include <map>
 #include <cctype> // std::isdigit
 #include <algorithm> // std::all_of
 #include <iostream>
 ArffFiles::ArffFiles() = default;
 std::vector<std::string> ArffFiles::getLines() const
 {
    return lines;
 }
 unsigned long int ArffFiles::getSize() const
 {
    return lines.size();
 }
 std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
 {
    return attributes;
 }
 std::string ArffFiles::getClassName() const
 {
    return className;
 }
 std::string ArffFiles::getClassType() const
 {
    return classType;
 }
 std::vector<std::vector<float>>& ArffFiles::getX()
 {
    return X;
 }
 std::vector<int>& ArffFiles::getY()
 {
    return y;
 }
 void ArffFiles::loadCommon(std::string fileName)
 {
    std::ifstream file(fileName);
    if (!file.is_open()) {
        throw std::invalid_argument("Unable to open file");
    }
    std::string line;
    std::string keyword;
    std::string attribute;
    std::string type;
    std::string type_w;
    while (getline(file, line)) {
        if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
            continue;
        }
        if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
            std::stringstream ss(line);
            ss >> keyword >> attribute;
            type = "";
            while (ss >> type_w)
                type += type_w + " ";
            attributes.emplace_back(trim(attribute), trim(type));
            continue;
        }
        if (line[0] == '@') {
            continue;
        }
        lines.push_back(line);
    }
    file.close();
    if (attributes.empty())
        throw std::invalid_argument("No attributes found");
 }
 void ArffFiles::load(const std::string& fileName, bool classLast)
 {
    int labelIndex;
    loadCommon(fileName);
    if (classLast) {
        className = std::get<0>(attributes.back());
        classType = std::get<1>(attributes.back());
        attributes.pop_back();
        labelIndex = static_cast<int>(attributes.size());
    } else {
        className = std::get<0>(attributes.front());
        classType = std::get<1>(attributes.front());
        attributes.erase(attributes.begin());
        labelIndex = 0;
    }
    generateDataset(labelIndex);
 }
 void ArffFiles::load(const std::string& fileName, const std::string& name)
 {
    int labelIndex;
    loadCommon(fileName);
    bool found = false;
    for (int i = 0; i < attributes.size(); ++i) {
        if (attributes[i].first == name) {
            className = std::get<0>(attributes[i]);
            classType = std::get<1>(attributes[i]);
            attributes.erase(attributes.begin() + i);
            labelIndex = i;
            found = true;
            break;
        }
    }
    if (!found) {
        throw std::invalid_argument("Class name not found");
    }
    generateDataset(labelIndex);
 }
 void ArffFiles::generateDataset(int labelIndex)
 {
    X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
    auto yy = std::vector<std::string>(lines.size(), "");
    auto removeLines = std::vector<int>(); // Lines with missing values
    for (size_t i = 0; i < lines.size(); i++) {
        std::stringstream ss(lines[i]);
        std::string value;
        int pos = 0;
        int xIndex = 0;
        while (getline(ss, value, ',')) {
            if (pos++ == labelIndex) {
                yy[i] = value;
            } else {
                if (value == "?") {
                    X[xIndex++][i] = -1;
                    removeLines.push_back(i);
                } else
                    X[xIndex++][i] = stof(value);
            }
        }
    }
    for (auto i : removeLines) {
        yy.erase(yy.begin() + i);
        for (auto& x : X) {
            x.erase(x.begin() + i);
        }
    }
    y = factorize(yy);
 }
 std::string ArffFiles::trim(const std::string& source)
 {
    std::string s(source);
    s.erase(0, s.find_first_not_of(" '\n\r\t"));
    s.erase(s.find_last_not_of(" '\n\r\t") + 1);
    return s;
 }
 std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
 {
    std::vector<int> yy;
    labels.clear();
    yy.reserve(labels_t.size());
    std::map<std::string, int> labelMap;
    int i = 0;
    for (const std::string& label : labels_t) {
        if (labelMap.find(label) == labelMap.end()) {
            labelMap[label] = i++;
            bool allDigits = std::all_of(label.begin(), label.end(), isdigit);
            if (allDigits)
                labels.push_back("Class " + label);
            else
                labels.push_back(label);
        }
        yy.push_back(labelMap[label]);
    }
    return yy;
 }
--- a/lib/Files/ArffFiles.h
+++ b/lib/Files/ArffFiles.h
@@ -1,34 +0,0 @@
 #ifndef ARFFFILES_H
 #define ARFFFILES_H
 #include <string>
 #include <vector>
 class ArffFiles {
 public:
    ArffFiles();
    void load(const std::string&, bool = true);
    void load(const std::string&, const std::string&);
    std::vector<std::string> getLines() const;
    unsigned long int getSize() const;
    std::string getClassName() const;
    std::string getClassType() const;
    std::vector<std::string> getLabels() const { return labels; }
    static std::string trim(const std::string&);
    std::vector<std::vector<float>>& getX();
    std::vector<int>& getY();
    std::vector<std::pair<std::string, std::string>> getAttributes() const;
    std::vector<int> factorize(const std::vector<std::string>& labels_t);
 private:
    std::vector<std::string> lines;
    std::vector<std::pair<std::string, std::string>> attributes;
    std::string className;
    std::string classType;
    std::vector<std::vector<float>> X;
    std::vector<int> y;
    std::vector<std::string> labels;
    void generateDataset(int);
    void loadCommon(std::string);
 };
 #endif
--- a/lib/Files/CMakeLists.txt
+++ b/lib/Files/CMakeLists.txt
@@ -1 +0,0 @@
 add_library(ArffFiles ArffFiles.cc)
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@@ -12,4 +12,4 @@ include_directories(
    ${Bayesnet_INCLUDE_DIRS}
 )
 add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp) 
-target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
+target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
--- a/sample/sample.cpp
+++ b/sample/sample.cpp
@@ -5,7 +5,7 @@
 #include <torch/torch.h>
 #include <argparse/argparse.hpp>
 #include <nlohmann/json.hpp>
-#include <ArffFiles.h>
+#include <ArffFiles.hpp>
 #include <CPPFImdlp.h>
 #include <folding.hpp>
 #include <bayesnet/utils/BayesMetrics.h>
@@ -79,11 +79,11 @@ int main(int argc, char** argv)
        }
        throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
            }
-    );
+        );
    program.add_argument("-p", "--path")
        .help(" folder where the data files are located, default")
        .default_value(std::string{ PATH }
-    );
+        );
    program.add_argument("-m", "--model")
        .help("Model to use " + platform::Models::instance()->toString())
        .action([](const std::string& value) {
@@ -93,7 +93,7 @@ int main(int argc, char** argv)
        }
        throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
            }
-    );
+        );
    program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
    program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
    program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
@@ -112,129 +112,129 @@ int main(int argc, char** argv)
        catch (...) {
            throw runtime_error("Number of folds must be an integer");
        }});
-    program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
+        program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
-    bool class_last, stratified, tensors, dump_cpt;
+        bool class_last, stratified, tensors, dump_cpt;
-    std::string model_name, file_name, path, complete_file_name;
+        std::string model_name, file_name, path, complete_file_name;
-    int nFolds, seed;
+        int nFolds, seed;
-    try {
+        try {
-        program.parse_args(argc, argv);
+            program.parse_args(argc, argv);
-        file_name = program.get<std::string>("dataset");
+            file_name = program.get<std::string>("dataset");
-        path = program.get<std::string>("path");
+            path = program.get<std::string>("path");
-        model_name = program.get<std::string>("model");
+            model_name = program.get<std::string>("model");
-        complete_file_name = path + file_name + ".arff";
+            complete_file_name = path + file_name + ".arff";
-        stratified = program.get<bool>("stratified");
+            stratified = program.get<bool>("stratified");
-        tensors = program.get<bool>("tensors");
+            tensors = program.get<bool>("tensors");
-        nFolds = program.get<int>("folds");
+            nFolds = program.get<int>("folds");
-        seed = program.get<int>("seed");
+            seed = program.get<int>("seed");
-        dump_cpt = program.get<bool>("dumpcpt");
+            dump_cpt = program.get<bool>("dumpcpt");
-        class_last = datasets[file_name];
+            class_last = datasets[file_name];
-        if (!file_exists(complete_file_name)) {
+            if (!file_exists(complete_file_name)) {
-            throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
+                throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
            }
        }
        catch (const exception& err) {
            cerr << err.what() << std::endl;
            cerr << program;
            exit(1);
        }
    }
    catch (const exception& err) {
        cerr << err.what() << std::endl;
        cerr << program;
        exit(1);
    }
-    /*
+        /*
-    * Begin Processing
+        * Begin Processing
-    */
+        */
-    auto handler = ArffFiles();
+        auto handler = ArffFiles();
-    handler.load(complete_file_name, class_last);
+        handler.load(complete_file_name, class_last);
-    // Get Dataset X, y
+        // Get Dataset X, y
-    std::vector<mdlp::samples_t>& X = handler.getX();
+        std::vector<mdlp::samples_t>& X = handler.getX();
-    mdlp::labels_t& y = handler.getY();
+        mdlp::labels_t& y = handler.getY();
-    // Get className & Features
+        // Get className & Features
-    auto className = handler.getClassName();
+        auto className = handler.getClassName();
-    std::vector<std::string> features;
+        std::vector<std::string> features;
-    auto attributes = handler.getAttributes();
+        auto attributes = handler.getAttributes();
-    transform(attributes.begin(), attributes.end(), back_inserter(features),
+        transform(attributes.begin(), attributes.end(), back_inserter(features),
-        [](const pair<std::string, std::string>& item) { return item.first; });
+            [](const pair<std::string, std::string>& item) { return item.first; });
-    // Discretize Dataset
+        // Discretize Dataset
-    auto [Xd, maxes] = discretize(X, y, features);
+        auto [Xd, maxes] = discretize(X, y, features);
-    maxes[className] = *max_element(y.begin(), y.end()) + 1;
+        maxes[className] = *max_element(y.begin(), y.end()) + 1;
-    map<std::string, std::vector<int>> states;
+        map<std::string, std::vector<int>> states;
-    for (auto feature : features) {
+        for (auto feature : features) {
-        states[feature] = std::vector<int>(maxes[feature]);
+            states[feature] = std::vector<int>(maxes[feature]);
    }
    states[className] = std::vector<int>(maxes[className]);
    auto clf = platform::Models::instance()->create(model_name);
    clf->fit(Xd, y, features, className, states);
    if (dump_cpt) {
        std::cout << "--- CPT Tables ---" << std::endl;
        clf->dump_cpt();
    }
    auto lines = clf->show();
    for (auto line : lines) {
        std::cout << line << std::endl;
    }
    std::cout << "--- Topological Order ---" << std::endl;
    auto order = clf->topological_order();
    for (auto name : order) {
        std::cout << name << ", ";
    }
    std::cout << "end." << std::endl;
    auto score = clf->score(Xd, y);
    std::cout << "Score: " << score << std::endl;
    auto graph = clf->graph();
    auto dot_file = model_name + "_" + file_name;
    ofstream file(dot_file + ".dot");
    file << graph;
    file.close();
    std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
    std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
    std::string stratified_string = stratified ? " Stratified" : "";
    std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
    std::cout << "==========================================" << std::endl;
    torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
    torch::Tensor yt = torch::tensor(y, torch::kInt32);
    for (int i = 0; i < features.size(); ++i) {
        Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
    }
    float total_score = 0, total_score_train = 0, score_train, score_test;
    folding::Fold* fold;
    double nodes = 0.0;
    if (stratified)
        fold = new folding::StratifiedKFold(nFolds, y, seed);
    else
        fold = new folding::KFold(nFolds, y.size(), seed);
    for (auto i = 0; i < nFolds; ++i) {
        auto [train, test] = fold->getFold(i);
        std::cout << "Fold: " << i + 1 << std::endl;
        if (tensors) {
            auto ttrain = torch::tensor(train, torch::kInt64);
            auto ttest = torch::tensor(test, torch::kInt64);
            torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
            torch::Tensor ytraint = yt.index({ ttrain });
            torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
            torch::Tensor ytestt = yt.index({ ttest });
            clf->fit(Xtraint, ytraint, features, className, states);
            auto temp = clf->predict(Xtraint);
            score_train = clf->score(Xtraint, ytraint);
            score_test = clf->score(Xtestt, ytestt);
        } else {
            auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
            auto [Xtest, ytest] = extract_indices(test, Xd, y);
            clf->fit(Xtrain, ytrain, features, className, states);
            std::cout << "Nodes: " << clf->getNumberOfNodes() << std::endl;
            nodes += clf->getNumberOfNodes();
            score_train = clf->score(Xtrain, ytrain);
            score_test = clf->score(Xtest, ytest);
        }
        states[className] = std::vector<int>(maxes[className]);
        auto clf = platform::Models::instance()->create(model_name);
        clf->fit(Xd, y, features, className, states);
        if (dump_cpt) {
            std::cout << "--- CPT Tables ---" << std::endl;
            clf->dump_cpt();
        }
-        total_score_train += score_train;
+        auto lines = clf->show();
-        total_score += score_test;
+        for (auto line : lines) {
-        std::cout << "Score Train: " << score_train << std::endl;
+            std::cout << line << std::endl;
-        std::cout << "Score Test : " << score_test << std::endl;
+        }
-        std::cout << "-------------------------------------------------------------------------------" << std::endl;
+        std::cout << "--- Topological Order ---" << std::endl;
-    }
+        auto order = clf->topological_order();
-    std::cout << "Nodes: " << nodes / nFolds << std::endl;
+        for (auto name : order) {
-    std::cout << "**********************************************************************************" << std::endl;
+            std::cout << name << ", ";
-    std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
+        }
-    std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
+        std::cout << "end." << std::endl;
        auto score = clf->score(Xd, y);
        std::cout << "Score: " << score << std::endl;
        auto graph = clf->graph();
        auto dot_file = model_name + "_" + file_name;
        ofstream file(dot_file + ".dot");
        file << graph;
        file.close();
        std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
        std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
        std::string stratified_string = stratified ? " Stratified" : "";
        std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
        std::cout << "==========================================" << std::endl;
        torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
        torch::Tensor yt = torch::tensor(y, torch::kInt32);
        for (int i = 0; i < features.size(); ++i) {
            Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
        }
        float total_score = 0, total_score_train = 0, score_train, score_test;
        folding::Fold* fold;
        double nodes = 0.0;
        if (stratified)
            fold = new folding::StratifiedKFold(nFolds, y, seed);
        else
            fold = new folding::KFold(nFolds, y.size(), seed);
        for (auto i = 0; i < nFolds; ++i) {
            auto [train, test] = fold->getFold(i);
            std::cout << "Fold: " << i + 1 << std::endl;
            if (tensors) {
                auto ttrain = torch::tensor(train, torch::kInt64);
                auto ttest = torch::tensor(test, torch::kInt64);
                torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
                torch::Tensor ytraint = yt.index({ ttrain });
                torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
                torch::Tensor ytestt = yt.index({ ttest });
                clf->fit(Xtraint, ytraint, features, className, states);
                auto temp = clf->predict(Xtraint);
                score_train = clf->score(Xtraint, ytraint);
                score_test = clf->score(Xtestt, ytestt);
            } else {
                auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
                auto [Xtest, ytest] = extract_indices(test, Xd, y);
                clf->fit(Xtrain, ytrain, features, className, states);
                std::cout << "Nodes: " << clf->getNumberOfNodes() << std::endl;
                nodes += clf->getNumberOfNodes();
                score_train = clf->score(Xtrain, ytrain);
                score_test = clf->score(Xtest, ytest);
            }
            if (dump_cpt) {
                std::cout << "--- CPT Tables ---" << std::endl;
                clf->dump_cpt();
            }
            total_score_train += score_train;
            total_score += score_test;
            std::cout << "Score Train: " << score_train << std::endl;
            std::cout << "Score Test : " << score_test << std::endl;
            std::cout << "-------------------------------------------------------------------------------" << std::endl;
        }
        std::cout << "Nodes: " << nodes / nFolds << std::endl;
        std::cout << "**********************************************************************************" << std::endl;
        std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
        std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
 }
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -26,7 +26,7 @@ add_executable(
    reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
    results/Result.cpp
 )
-target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
+target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
 # b_grid
 set(grid_sources GridSearch.cpp GridData.cpp)
@@ -35,7 +35,7 @@ add_executable(b_grid commands/b_grid.cpp ${grid_sources}
    common/Datasets.cpp common/Dataset.cpp
    main/HyperParameters.cpp main/Models.cpp 
 )
-target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
+target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
 # b_list
 add_executable(b_list commands/b_list.cpp
@@ -44,7 +44,7 @@ add_executable(b_list commands/b_list.cpp
    reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
    results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
 )
-target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
+target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
 # b_main
 set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
@@ -54,7 +54,7 @@ add_executable(b_main commands/b_main.cpp ${main_sources}
    reports/ReportConsole.cpp reports/ReportBase.cpp 
    results/Result.cpp
 )
-target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
+target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
 # b_manage
 set(manage_sources ManageScreen.cpp CommandParser.cpp ResultsManager.cpp)
@@ -66,4 +66,4 @@ add_executable(
    results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
    main/Scores.cpp
 )
-target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp "${BayesNet}")
+target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" mdlp "${BayesNet}")
--- a/src/common/Dataset.cpp
+++ b/src/common/Dataset.cpp
@@ -1,4 +1,4 @@
-#include <ArffFiles.h>
+#include <ArffFiles.hpp>
 #include <fstream>
 #include "Dataset.h"
 namespace platform {