Add numeric features management to Dataset

This commit is contained in:
2024-06-06 13:03:57 +02:00
parent 6858b3d89a
commit a7ec930fa0
15 changed files with 210 additions and 43 deletions

View File

@@ -4,14 +4,17 @@
#include <map>
#include <vector>
#include <string>
#include <CPPFImdlp.h>
#include <common/DiscretizationRegister.h>
#include "Utils.h"
#include "SourceData.h"
namespace platform {
class Dataset {
public:
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType, std::vector<int> numericFeaturesIdx) :
path(path), name(name), className(className), discretize(discretize),
loaded(false), fileType(fileType), numericFeaturesIdx(numericFeaturesIdx)
{
};
explicit Dataset(const Dataset&);
std::string getName() const;
std::string getClassName() const;
@@ -20,9 +23,11 @@ namespace platform {
std::map<std::string, std::vector<int>> getStates() const;
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
std::pair<torch::Tensor&, torch::Tensor&> getDiscretizedTrainTestTensors();
std::pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures() const;
int getNSamples() const;
std::vector<bool>& getNumericFeatures() { return numericFeatures; }
void load();
const bool inline isLoaded() const { return loaded; };
private:
@@ -31,12 +36,15 @@ namespace platform {
fileType_t fileType;
std::string className;
int n_samples{ 0 }, n_features{ 0 };
std::vector<int> numericFeaturesIdx;
std::vector<bool> numericFeatures; // true if feature is numeric
std::vector<std::string> features;
std::vector<std::string> labels;
std::map<std::string, std::vector<int>> states;
bool loaded;
bool discretize;
torch::Tensor X, y;
torch::Tensor X_train, X_test;
std::vector<std::vector<float>> Xv;
std::vector<std::vector<int>> Xd;
std::vector<int> yv;