From 64fc97b892ed8d15b8d31a3218ede67d3c6d8919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 19 Oct 2023 09:57:04 +0200 Subject: [PATCH] Rename utilities sources to match final names --- mac_mst.txt | 33 ---- src/Platform/CMakeLists.txt | 12 +- src/Platform/{best.cc => b_best.cc} | 0 src/Platform/{list.cc => b_list.cc} | 0 src/Platform/{main.cc => b_main.cc} | 0 src/Platform/{manage.cc => b_manage.cc} | 0 src/Platform/testx.cpp | 248 ------------------------ 7 files changed, 5 insertions(+), 288 deletions(-) delete mode 100644 mac_mst.txt rename src/Platform/{best.cc => b_best.cc} (100%) rename src/Platform/{list.cc => b_list.cc} (100%) rename src/Platform/{main.cc => b_main.cc} (100%) rename src/Platform/{manage.cc => b_manage.cc} (100%) delete mode 100644 src/Platform/testx.cpp diff --git a/mac_mst.txt b/mac_mst.txt deleted file mode 100644 index 7327f7b..0000000 --- a/mac_mst.txt +++ /dev/null @@ -1,33 +0,0 @@ -Weights matrix: - 0.0000000, 0.0384968, 0.0795434, 0.1546867, -0.0000000, 0.1788104, 0.2214721, 0.0323837, 0.0366549, - 0.0384968, 0.0000000, 0.0200662, 0.0200937, -0.0000000, 0.0637224, 0.0183005, 0.0127657, 0.0136054, - 0.0795434, 0.0200662, 0.0000000, 0.0605489, -0.0000000, 0.0894469, 0.1689408, 0.0321602, 0.0223184, - 0.1546867, 0.0200937, 0.0605489, 0.0000000, -0.0000000, 0.1150757, 0.1332292, 0.0422865, 0.0191138, --0.0000000, -0.0000000, -0.0000000, -0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, - 0.1788104, 0.0637224, 0.0894469, 0.1150757, 0.0000000, 0.0000000, 0.1407102, 0.0406590, 0.0366986, - 0.2214721, 0.0183005, 0.1689408, 0.1332292, 0.0000000, 0.1407102, 0.0000000, 0.0427515, 0.0349965, - 0.0323837, 0.0127657, 0.0321602, 0.0422865, 0.0000000, 0.0406590, 0.0427515, 0.0000000, 0.0343376, - 0.0366549, 0.0136054, 0.0223184, 0.0191138, 0.0000000, 0.0366986, 0.0349965, 0.0343376, 0.0000000, -Edge : Weight -0 - 6 : 0.2214721 -0 - 5 : 0.1788104 -2 - 6 : 0.1689408 -0 - 3 : 0.1546867 -1 - 5 : 0.0637224 -6 - 7 : 0.0427515 -5 - 8 : 0.0366986 -4 - 5 : 0.0000000 -------------------------------------------------------------------------------- -Metrics Test - Test Maximum Spanning Tree -------------------------------------------------------------------------------- -/Users/rmontanana/Code/BayesNet/tests/TestBayesMetrics.cc:58 -............................................................................... - -/Users/rmontanana/Code/BayesNet/tests/TestBayesMetrics.cc:69: PASSED: - REQUIRE( result == resultsMST.at(file_name) ) -with expansion: - (0, 6) (0, 5) (0, 3) (5, 1) (5, 8) (5, 4) (6, 2) (6, 7) - == - (0, 6) (0, 5) (0, 3) (5, 1) (5, 8) (5, 4) (6, 2) (6, 7) - diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 3a565e1..a253283 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -5,13 +5,11 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) -add_executable(b_main main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc) -add_executable(b_manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) -add_executable(b_list list.cc Datasets.cc Dataset.cc) -add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc) -add_executable(testx testx.cpp Datasets.cc Dataset.cc Folding.cc ) +add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc) +add_executable(b_manage b_manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) +add_executable(b_list b_list.cc Datasets.cc Dataset.cc) +add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc) target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}") -target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") -target_link_libraries(testx ArffFiles BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file +target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Platform/best.cc b/src/Platform/b_best.cc similarity index 100% rename from src/Platform/best.cc rename to src/Platform/b_best.cc diff --git a/src/Platform/list.cc b/src/Platform/b_list.cc similarity index 100% rename from src/Platform/list.cc rename to src/Platform/b_list.cc diff --git a/src/Platform/main.cc b/src/Platform/b_main.cc similarity index 100% rename from src/Platform/main.cc rename to src/Platform/b_main.cc diff --git a/src/Platform/manage.cc b/src/Platform/b_manage.cc similarity index 100% rename from src/Platform/manage.cc rename to src/Platform/b_manage.cc diff --git a/src/Platform/testx.cpp b/src/Platform/testx.cpp deleted file mode 100644 index dfd6a21..0000000 --- a/src/Platform/testx.cpp +++ /dev/null @@ -1,248 +0,0 @@ -#include "Folding.h" -#include -#include "nlohmann/json.hpp" -#include "map" -#include -#include -#include "Datasets.h" -#include "Network.h" -#include "ArffFiles.h" -#include "CPPFImdlp.h" -#include "CFS.h" -#include "IWSS.h" -#include "FCBF.h" - -using namespace std; -using namespace platform; -using namespace torch; - -string counts(vector y, vector indices) -{ - auto result = map(); - stringstream oss; - for (auto i = 0; i < indices.size(); ++i) { - result[y[indices[i]]]++; - } - string final_result = ""; - for (auto i = 0; i < result.size(); ++i) - oss << i << " -> " << setprecision(2) << fixed - << (double)result[i] * 100 / indices.size() << "% (" << result[i] << ") //"; - oss << endl; - return oss.str(); -} -class Paths { -public: - static string datasets() - { - return "datasets/"; - } -}; - -pair, map> discretize(vector& X, mdlp::labels_t& y, vector features) -{ - vector Xd; - map maxes; - auto fimdlp = mdlp::CPPFImdlp(); - for (int i = 0; i < X.size(); i++) { - fimdlp.fit(X[i], y); - mdlp::labels_t& xd = fimdlp.transform(X[i]); - maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1; - Xd.push_back(xd); - } - return { Xd, maxes }; -} - -vector discretizeDataset(vector& X, mdlp::labels_t& y) -{ - vector Xd; - auto fimdlp = mdlp::CPPFImdlp(); - for (int i = 0; i < X.size(); i++) { - fimdlp.fit(X[i], y); - mdlp::labels_t& xd = fimdlp.transform(X[i]); - Xd.push_back(xd); - } - return Xd; -} - -bool file_exists(const string& name) -{ - if (FILE* file = fopen(name.c_str(), "r")) { - fclose(file); - return true; - } else { - return false; - } -} - -tuple, string, map>> loadDataset(const string& name, bool class_last, bool discretize_dataset) -{ - auto handler = ArffFiles(); - handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); - // Get Dataset X, y - vector& X = handler.getX(); - mdlp::labels_t& y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); - vector features; - auto attributes = handler.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); - Tensor Xd; - auto states = map>(); - if (discretize_dataset) { - auto Xr = discretizeDataset(X, y); - Xd = torch::zeros({ static_cast(Xr.size()), static_cast(Xr[0].size()) }, torch::kInt32); - for (int i = 0; i < features.size(); ++i) { - states[features[i]] = vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); - auto item = states.at(features[i]); - iota(begin(item), end(item), 0); - Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32)); - } - states[className] = vector(*max_element(y.begin(), y.end()) + 1); - iota(begin(states.at(className)), end(states.at(className)), 0); - } else { - Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); - for (int i = 0; i < features.size(); ++i) { - Xd.index_put_({ i, "..." }, torch::tensor(X[i])); - } - } - return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; -} - -tuple>, vector, vector, string, map>> loadFile(const string& name) -{ - auto handler = ArffFiles(); - handler.load(Paths::datasets() + static_cast(name) + ".arff"); - // Get Dataset X, y - vector& X = handler.getX(); - mdlp::labels_t& y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); - vector features; - auto attributes = handler.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); - // Discretize Dataset - vector Xd; - map maxes; - tie(Xd, maxes) = discretize(X, y, features); - maxes[className] = *max_element(y.begin(), y.end()) + 1; - map> states; - for (auto feature : features) { - states[feature] = vector(maxes[feature]); - } - states[className] = vector(maxes[className]); - return { Xd, y, features, className, states }; -} -class RawDatasets { -public: - RawDatasets(const string& file_name, bool discretize) - { - // Xt can be either discretized or not - tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize); - // Xv is always discretized - tie(Xv, yv, featuresv, classNamev, statesv) = loadFile(file_name); - auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1); - dataset = torch::cat({ Xt, yresized }, 0); - nSamples = dataset.size(1); - weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble); - weightsv = vector(nSamples, 1.0 / nSamples); - classNumStates = discretize ? statest.at(classNamet).size() : 0; - } - torch::Tensor Xt, yt, dataset, weights; - vector> Xv; - vector weightsv; - vector yv; - vector featurest, featuresv; - map> statest, statesv; - string classNamet, classNamev; - int nSamples, classNumStates; - double epsilon = 1e-5; -}; -int main() -{ - // map balance = { - // {"iris", "33,33% (50) / 33,33% (50) / 33,33% (50)"}, - // {"diabetes", "34,90% (268) / 65,10% (500)"}, - // {"ecoli", "42,56% (143) / 22,92% (77) / 0,60% (2) / 0,60% (2) / 10,42% (35) / 5,95% (20) / 1,49% (5) / 15,48% (52)"}, - // {"glass", "32,71% (70) / 7,94% (17) / 4,21% (9) / 35,51% (76) / 13,55% (29) / 6,07% (13)"} - // }; - // for (const auto& file_name : { "iris", "glass", "ecoli", "diabetes" }) { - // auto dt = Datasets(true, "Arff"); - // auto [X, y] = dt.getVectors(file_name); - // //auto fold = KFold(5, 150); - // auto fold = StratifiedKFold(5, y, -1); - // cout << "***********************************************************************************************" << endl; - // cout << "Dataset: " << file_name << endl; - // cout << "NÂș Samples: " << dt.getNSamples(file_name) << endl; - // cout << "Class states: " << dt.getNClasses(file_name) << endl; - // cout << "Balance: " << balance.at(file_name) << endl; - // for (int i = 0; i < 5; ++i) { - // cout << "Fold: " << i << endl; - // auto [train, test] = fold.getFold(i); - // cout << "Train: "; - // cout << "(" << train.size() << "): "; - // // for (auto j = 0; j < static_cast(train.size()); j++) - // // cout << train[j] << ", "; - // cout << endl; - // cout << "Train Statistics : " << counts(y, train); - // cout << "-------------------------------------------------------------------------------" << endl; - // cout << "Test: "; - // cout << "(" << test.size() << "): "; - // // for (auto j = 0; j < static_cast(test.size()); j++) - // // cout << test[j] << ", "; - // cout << endl; - // cout << "Test Statistics: " << counts(y, test); - // cout << "==============================================================================" << endl; - // } - // cout << "***********************************************************************************************" << endl; - // } - // const string file_name = "iris"; - // auto net = bayesnet::Network(); - // auto dt = Datasets(true, "Arff"); - // auto raw = RawDatasets("iris", true); - // auto [X, y] = dt.getVectors(file_name); - // cout << "Dataset dims " << raw.dataset.sizes() << endl; - // cout << "weights dims " << raw.weights.sizes() << endl; - // cout << "States dims " << raw.statest.size() << endl; - // cout << "features: "; - // for (const auto& feature : raw.featurest) { - // cout << feature << ", "; - // net.addNode(feature); - // } - // net.addNode(raw.classNamet); - // cout << endl; - // net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest); - auto dt = Datasets(true, "Arff"); - nlohmann::json output; - for (const auto& name : dt.getNames()) { - // for (const auto& name : { "iris" }) { - auto [X, y] = dt.getTensors(name); - auto features = dt.getFeatures(name); - auto states = dt.getStates(name); - auto className = dt.getClassName(name); - int maxFeatures = 0; - auto classNumStates = states.at(className).size(); - torch::Tensor weights = torch::full({ X.size(1) }, 1.0 / X.size(1), torch::kDouble); - auto dataset = X; - auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); - dataset = torch::cat({ dataset, yresized }, 0); - auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, classNumStates, weights); - auto fcbf = bayesnet::FCBF(dataset, features, className, maxFeatures, classNumStates, weights, 1e-7); - auto iwss = bayesnet::IWSS(dataset, features, className, maxFeatures, classNumStates, weights, 0.5); - cout << "Dataset: " << setw(20) << name << flush; - cfs.fit(); - cout << " CFS: " << setw(4) << cfs.getFeatures().size() << flush; - fcbf.fit(); - cout << " FCBF: " << setw(4) << fcbf.getFeatures().size() << flush; - iwss.fit(); - cout << " IWSS: " << setw(4) << iwss.getFeatures().size() << flush; - cout << endl; - output[name]["CFS"] = cfs.getFeatures(); - output[name]["FCBF"] = fcbf.getFeatures(); - output[name]["IWSS"] = iwss.getFeatures(); - } - ofstream file("features_cpp.json"); - file << output; - file.close(); - -} -