Add numeric features management to Dataset

This commit is contained in:
2024-06-06 13:03:57 +02:00
parent 6858b3d89a
commit a7ec930fa0
15 changed files with 210 additions and 43 deletions

View File

@@ -1,3 +1,4 @@
#include <algorithm>
#include "common/Colors.h"
#include "common/Datasets.h"
#include "common/Paths.h"
@@ -12,7 +13,7 @@ namespace platform {
auto part = temp.substr(0, DatasetsConsole::BALANCE_LENGTH);
line += part + "\n";
body.push_back(line);
line = string(name_len + 22, ' ');
line = string(name_len + 28, ' ');
temp = temp.substr(DatasetsConsole::BALANCE_LENGTH);
}
line += temp + "\n";
@@ -26,8 +27,8 @@ namespace platform {
std::stringstream sheader;
auto datasets_names = datasets.getNames();
int maxName = std::max(size_t(7), (*max_element(datasets_names.begin(), datasets_names.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size());
std::vector<std::string> header_labels = { " #", "Dataset", "Sampl.", "Feat.", "Cls", "Balance" };
std::vector<int> header_lengths = { 3, maxName, 6, 5, 3, DatasetsConsole::BALANCE_LENGTH };
std::vector<std::string> header_labels = { " #", "Dataset", "Sampl.", "Feat.", "#Num.", "Cls", "Balance" };
std::vector<int> header_lengths = { 3, maxName, 6, 5, 5, 3, DatasetsConsole::BALANCE_LENGTH };
sheader << Colors::GREEN();
for (int i = 0; i < header_labels.size(); i++) {
sheader << setw(header_lengths[i]) << left << header_labels[i] << " ";
@@ -50,7 +51,11 @@ namespace platform {
datasets.loadDataset(dataset);
auto nSamples = datasets.getNSamples(dataset);
line << setw(6) << right << nSamples << " ";
line << setw(5) << right << datasets.getFeatures(dataset).size() << " ";
auto nFeatures = datasets.getFeatures(dataset).size();
line << setw(5) << right << nFeatures << " ";
auto numericFeatures = datasets.getNumericFeatures(dataset);
auto num = std::count(numericFeatures.begin(), numericFeatures.end(), true);
line << setw(5) << right << num << " ";
line << setw(3) << right << datasets.getNClasses(dataset) << " ";
std::string sep = "";
oss.str("");
@@ -63,6 +68,7 @@ namespace platform {
data[dataset] = json::object();
data[dataset]["samples"] = nSamples;
data[dataset]["features"] = datasets.getFeatures(dataset).size();
data[dataset]["numericFeatures"] = num;
data[dataset]["classes"] = datasets.getNClasses(dataset);
data[dataset]["balance"] = oss.str();
}

View File

@@ -17,11 +17,11 @@ namespace platform {
int balanceSize = 75; // Min size of the column
worksheet = workbook_add_worksheet(workbook, "Datasets");
// Header
worksheet_merge_range(worksheet, 0, 0, 0, 5, "Datasets", styles["headerFirst"]);
worksheet_merge_range(worksheet, 0, 0, 0, 6, "Datasets", styles["headerFirst"]);
// Body header
row = 2;
int col = 0;
for (const auto& name : { "", "Dataset", "Samples", "Features", "Classes", "Balance" }) {
for (const auto& name : { "", "Dataset", "Samples", "Features", "#Numer.", "Classes", "Balance" }) {
writeString(row, col++, name, "bodyHeader");
}
// Body
@@ -34,12 +34,13 @@ namespace platform {
writeString(row, 1, key.c_str(), "text");
writeInt(row, 2, value["samples"], "ints");
writeInt(row, 3, value["features"], "ints");
writeInt(row, 4, value["classes"], "ints");
writeString(row, 5, value["balance"].get<std::string>().c_str(), "text");
writeInt(row, 4, value["numericFeatures"], "ints");
writeInt(row, 5, value["classes"], "ints");
writeString(row, 6, value["balance"].get<std::string>().c_str(), "text");
}
// Format columns
worksheet_freeze_panes(worksheet, 3, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize, 10, 10, 10, balanceSize };
std::vector<int> columns_sizes = { 5, datasetNameSize, 10, 10, 10, 10, balanceSize };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}