Add numeric features management to Dataset

2024-06-06 13:03:57 +02:00
parent 6858b3d89a
commit a7ec930fa0
15 changed files with 210 additions and 43 deletions
--- a/src/common/Dataset.h
+++ b/src/common/Dataset.h
@@ -4,14 +4,17 @@
 #include <map>
 #include <vector>
 #include <string>
-#include <CPPFImdlp.h>
+#include <common/DiscretizationRegister.h>
 #include "Utils.h"
 #include "SourceData.h"
 namespace platform {
-
    class Dataset {
    public:
-        Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
+        Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType, std::vector<int> numericFeaturesIdx) :
+            path(path), name(name), className(className), discretize(discretize),
+            loaded(false), fileType(fileType), numericFeaturesIdx(numericFeaturesIdx)
+        {
+        };
        explicit Dataset(const Dataset&);
        std::string getName() const;
        std::string getClassName() const;
@@ -20,9 +23,11 @@ namespace platform {
        std::map<std::string, std::vector<int>> getStates() const;
        std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
        std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
+        std::pair<torch::Tensor&, torch::Tensor&> getDiscretizedTrainTestTensors();
        std::pair<torch::Tensor&, torch::Tensor&> getTensors();
        int getNFeatures() const;
        int getNSamples() const;
+        std::vector<bool>& getNumericFeatures() { return numericFeatures; }
        void load();
        const bool inline isLoaded() const { return loaded; };
    private:
@@ -31,12 +36,15 @@ namespace platform {
        fileType_t fileType;
        std::string className;
        int n_samples{ 0 }, n_features{ 0 };
+        std::vector<int> numericFeaturesIdx;
+        std::vector<bool> numericFeatures; // true if feature is numeric
        std::vector<std::string> features;
        std::vector<std::string> labels;
        std::map<std::string, std::vector<int>> states;
        bool loaded;
        bool discretize;
        torch::Tensor X, y;
+        torch::Tensor X_train, X_test;
        std::vector<std::vector<float>> Xv;
        std::vector<std::vector<int>> Xd;
        std::vector<int> yv;