diff --git a/.vscode/launch.json b/.vscode/launch.json index c1275e6..cade330 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -46,6 +46,14 @@ ], "cwd": "/Users/rmontanana/Code/discretizbench", }, + { + "type": "lldb", + "request": "launch", + "name": "list", + "program": "${workspaceFolder}/build/src/Platform/list", + "args": [], + "cwd": "/Users/rmontanana/Code/discretizbench", + }, { "name": "Build & debug active file", "type": "cppdbg", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 5d92a8f..45cc63d 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -32,6 +32,29 @@ ], "group": "build", "detail": "Task generated by Debugger." + }, + { + "type": "cppbuild", + "label": "C/C++: g++ build active file", + "command": "/usr/bin/g++", + "args": [ + "-fdiagnostics-color=always", + "-g", + "${file}", + "-o", + "${fileDirname}/${fileBasenameNoExtension}" + ], + "options": { + "cwd": "${fileDirname}" + }, + "problemMatcher": [ + "$gcc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "detail": "Task generated by Debugger." } ] } \ No newline at end of file diff --git a/Makefile b/Makefile index 3e097b7..b883892 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png build: ## Build the main and BayesNetSample - cmake --build build -t main -t BayesNetSample -t manage -j 32 + cmake --build build -t main -t BayesNetSample -t manage -t list -j 32 clean: ## Clean the debug info @echo ">>> Cleaning Debug BayesNet ..."; @@ -35,7 +35,7 @@ release: ## Build a Release version of the project @if [ -d ./build ]; then rm -rf ./build; fi @mkdir build; cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \ - cmake --build build -t main -t BayesNetSample -t manage -j 32; + cmake --build build -t main -t BayesNetSample -t manage -t list -j 32; @echo ">>> Done"; test: ## Run tests diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 0eb26ce..78c6615 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -6,5 +6,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc Report.cc) add_executable(manage manage.cc Results.cc Report.cc) +add_executable(list list.cc platformUtils Datasets.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") -target_link_libraries(manage "${TORCH_LIBRARIES}") \ No newline at end of file +target_link_libraries(manage "${TORCH_LIBRARIES}") +target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc index 6756148..b187be8 100644 --- a/src/Platform/Datasets.cc +++ b/src/Platform/Datasets.cc @@ -24,75 +24,110 @@ namespace platform { transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); return result; } - vector Datasets::getFeatures(string name) + vector Datasets::getFeatures(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getFeatures(); + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getFeatures(); } else { throw invalid_argument("Dataset not loaded."); } } - map> Datasets::getStates(string name) + map> Datasets::getStates(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getStates(); + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getStates(); } else { throw invalid_argument("Dataset not loaded."); } } - string Datasets::getClassName(string name) + void Datasets::loadDataset(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getClassName(); + if (datasets.at(name)->isLoaded()) { + return; + } else { + datasets.at(name)->load(); + } + } + string Datasets::getClassName(const string& name) const + { + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getClassName(); } else { throw invalid_argument("Dataset not loaded."); } } - int Datasets::getNSamples(string name) + int Datasets::getNSamples(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getNSamples(); + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getNSamples(); } else { throw invalid_argument("Dataset not loaded."); } } - pair>&, vector&> Datasets::getVectors(string name) + int Datasets::getNClasses(const string& name) + { + if (datasets.at(name)->isLoaded()) { + auto className = datasets.at(name)->getClassName(); + if (discretize) { + auto states = getStates(name); + return states.at(className).size(); + } + auto [Xv, yv] = getVectors(name); + return *max_element(yv.begin(), yv.end()) + 1; + } else { + throw invalid_argument("Dataset not loaded."); + } + } + vector Datasets::getClassesCounts(const string& name) const + { + if (datasets.at(name)->isLoaded()) { + auto [Xv, yv] = datasets.at(name)->getVectors(); + vector counts(*max_element(yv.begin(), yv.end()) + 1); + for (auto y : yv) { + counts[y]++; + } + return counts; + } else { + throw invalid_argument("Dataset not loaded."); + } + } + pair>&, vector&> Datasets::getVectors(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectors(); } - pair>&, vector&> Datasets::getVectorsDiscretized(string name) + pair>&, vector&> Datasets::getVectorsDiscretized(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectorsDiscretized(); } - pair Datasets::getTensors(string name) + pair Datasets::getTensors(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getTensors(); } - bool Datasets::isDataset(const string& name) + bool Datasets::isDataset(const string& name) const { return datasets.find(name) != datasets.end(); } Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) { } - string Dataset::getName() + string Dataset::getName() const { return name; } - string Dataset::getClassName() + string Dataset::getClassName() const { return className; } - vector Dataset::getFeatures() + vector Dataset::getFeatures() const { if (loaded) { return features; @@ -100,7 +135,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - int Dataset::getNFeatures() + int Dataset::getNFeatures() const { if (loaded) { return n_features; @@ -108,7 +143,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - int Dataset::getNSamples() + int Dataset::getNSamples() const { if (loaded) { return n_samples; @@ -116,7 +151,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - map> Dataset::getStates() + map> Dataset::getStates() const { if (loaded) { return states; diff --git a/src/Platform/Datasets.h b/src/Platform/Datasets.h index 4ccd1f0..a99c86e 100644 --- a/src/Platform/Datasets.h +++ b/src/Platform/Datasets.h @@ -29,15 +29,15 @@ namespace platform { public: Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; explicit Dataset(const Dataset&); - string getName(); - string getClassName(); - vector getFeatures(); - map> getStates(); + string getName() const; + string getClassName() const; + vector getFeatures() const; + map> getStates() const; pair>&, vector&> getVectors(); pair>&, vector&> getVectorsDiscretized(); pair getTensors(); - int getNFeatures(); - int getNSamples(); + int getNFeatures() const; + int getNSamples() const; void load(); const bool inline isLoaded() const { return loaded; }; }; @@ -51,14 +51,17 @@ namespace platform { public: explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); }; vector getNames(); - vector getFeatures(string name); - int getNSamples(string name); - string getClassName(string name); - map> getStates(string name); - pair>&, vector&> getVectors(string name); - pair>&, vector&> getVectorsDiscretized(string name); - pair getTensors(string name); - bool isDataset(const string& name); + vector getFeatures(const string& name) const; + int getNSamples(const string& name) const; + string getClassName(const string& name) const; + int getNClasses(const string& name); + vector getClassesCounts(const string& name) const; + map> getStates(const string& name) const; + pair>&, vector&> getVectors(const string& name); + pair>&, vector&> getVectorsDiscretized(const string& name); + pair getTensors(const string& name); + bool isDataset(const string& name) const; + void loadDataset(const string& name) const; }; }; diff --git a/src/Platform/Paths.h b/src/Platform/Paths.h index 756e61a..fdda25a 100644 --- a/src/Platform/Paths.h +++ b/src/Platform/Paths.h @@ -1,5 +1,6 @@ #ifndef PATHS_H #define PATHS_H +#include namespace platform { class Paths { public: diff --git a/src/Platform/Report.cc b/src/Platform/Report.cc index a40a482..cc3b0a0 100644 --- a/src/Platform/Report.cc +++ b/src/Platform/Report.cc @@ -1,6 +1,9 @@ +#include +#include #include "Report.h" #include "BestResult.h" + namespace platform { string headerLine(const string& text) { @@ -31,21 +34,31 @@ namespace platform { body(); footer(); } + struct separated : numpunct { + char do_decimal_point() const { return ','; } + char do_thousands_sep() const { return '.'; } + string do_grouping() const { return "\03"; } + }; void Report::header() { + locale mylocale(cout.getloc(), new separated); + locale::global(mylocale); + cout.imbue(mylocale); + stringstream oss; cout << Colors::MAGENTA() << string(MAXL, '*') << endl; cout << headerLine("Report " + data["model"].get() + " ver. " + data["version"].get() + " with " + to_string(data["folds"].get()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get() + " " + data["time"].get()); cout << headerLine(data["title"].get()); cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get() ? "True" : "False")); - cout << headerLine("Execution took " + to_string(data["duration"].get()) + " seconds, " + to_string(data["duration"].get() / 3600) + " hours, on " + data["platform"].get()); + oss << "Execution took " << setprecision(2) << fixed << data["duration"].get() << " seconds, " << data["duration"].get() / 3600 << " hours, on " << data["platform"].get(); + cout << headerLine(oss.str()); cout << headerLine("Score is " + data["score_name"].get()); cout << string(MAXL, '*') << endl; cout << endl; } void Report::body() { - cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; - cout << "============================== ====== ===== === ======= ======= ======= =============== ================== ===============" << endl; + cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; + cout << "============================== ====== ===== === ========= ========= ========= =============== ================== ===============" << endl; json lastResult; totalScore = 0; bool odd = true; @@ -55,9 +68,9 @@ namespace platform { cout << setw(6) << right << r["samples"].get() << " "; cout << setw(5) << right << r["features"].get() << " "; cout << setw(3) << right << r["classes"].get() << " "; - cout << setw(7) << setprecision(2) << fixed << r["nodes"].get() << " "; - cout << setw(7) << setprecision(2) << fixed << r["leaves"].get() << " "; - cout << setw(7) << setprecision(2) << fixed << r["depth"].get() << " "; + cout << setw(9) << setprecision(2) << fixed << r["nodes"].get() << " "; + cout << setw(9) << setprecision(2) << fixed << r["leaves"].get() << " "; + cout << setw(9) << setprecision(2) << fixed << r["depth"].get() << " "; cout << setw(8) << right << setprecision(6) << fixed << r["score"].get() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get() << " "; cout << setw(11) << right << setprecision(6) << fixed << r["time"].get() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get() << " "; try { @@ -85,7 +98,9 @@ namespace platform { cout << Colors::MAGENTA() << string(MAXL, '*') << endl; auto score = data["score_name"].get(); if (score == BestResult::scoreName()) { - cout << headerLine(score + " compared to " + BestResult::title() + " .: " + to_string(totalScore / BestResult::score())); + stringstream oss; + oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score(); + cout << headerLine(oss.str()); } cout << string(MAXL, '*') << endl << Colors::RESET(); diff --git a/src/Platform/list.cc b/src/Platform/list.cc new file mode 100644 index 0000000..ed8396d --- /dev/null +++ b/src/Platform/list.cc @@ -0,0 +1,57 @@ +#include +#include +#include "Paths.h" +#include "Colors.h" +#include "Datasets.h" + +using namespace std; +const int BALANCE_LENGTH = 75; + +struct separated : numpunct { + char do_decimal_point() const { return ','; } + char do_thousands_sep() const { return '.'; } + string do_grouping() const { return "\03"; } +}; + +void outputBalance(const string& balance) +{ + auto temp = string(balance); + while (temp.size() > BALANCE_LENGTH - 1) { + auto part = temp.substr(0, BALANCE_LENGTH); + cout << part << endl; + cout << setw(48) << " "; + temp = temp.substr(BALANCE_LENGTH); + } + cout << temp << endl; +} + +int main(int argc, char** argv) +{ + auto data = platform::Datasets(platform::Paths().datasets(), false); + locale mylocale(cout.getloc(), new separated); + locale::global(mylocale); + cout.imbue(mylocale); + cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << endl; + string balanceBars = string(BALANCE_LENGTH, '='); + cout << "============================== ====== ===== === " << balanceBars << endl; + bool odd = true; + for (const auto& dataset : data.getNames()) { + auto color = odd ? Colors::CYAN() : Colors::BLUE(); + cout << color << setw(30) << left << dataset << " "; + data.loadDataset(dataset); + auto nSamples = data.getNSamples(dataset); + cout << setw(6) << right << nSamples << " "; + cout << setw(5) << right << data.getFeatures(dataset).size() << " "; + cout << setw(3) << right << data.getNClasses(dataset) << " "; + stringstream oss; + string sep = ""; + for (auto number : data.getClassesCounts(dataset)) { + oss << sep << setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; + sep = " / "; + } + outputBalance(oss.str()); + odd = !odd; + } + cout << Colors::RESET() << endl; + return 0; +}