Fix discretize only numerics in tests

This commit is contained in:
2025-08-21 12:56:41 +02:00
parent 8578d68c57
commit f1a2349245
7 changed files with 49075 additions and 13 deletions

View File

@@ -28,6 +28,9 @@ public:
std::vector<string> features;
std::string className;
map<std::string, std::vector<int>> states;
//catalog holds the mapping between dataset names and their corresponding indices of numeric features (-1) means all are numeric
//and an empty vector means none are numeric
map<std::string, std::vector<int>> catalog;
int nSamples, classNumStates;
double epsilon = 1e-5;
bool discretize;
@@ -65,8 +68,30 @@ private:
+ "classNumStates: " + std::to_string(classNumStates) + "\n"
+ "states: " + states_ + "\n";
}
map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X);
std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
std::vector<std::string> split(const std::string& text, char delimiter)
{
std::vector<std::string> result;
std::stringstream ss(text);
std::string token;
while (std::getline(ss, token, delimiter)) {
result.push_back(trim(token));
}
return result;
}
map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X, const std::vector<bool>& is_numeric);
void loadDataset(const std::string& name, bool class_last);
map<std::string, std::vector<int>> loadCatalog();
};
#endif //TEST_UTILS_H