From 099b4bea0945f1f99b22716ca06be60af77b9730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Wed, 26 Jul 2023 01:39:01 +0200 Subject: [PATCH] Fix some mistakes in tensors treatment --- .vscode/launch.json | 20 +++++--- sample/CMakeLists.txt | 2 +- sample/sample.cc | 96 +++++++++++++++++++++++++++++++++-- src/BayesNet/BayesMetrics.cc | 10 ++-- src/BayesNet/Classifier.cc | 11 ++-- src/BayesNet/Ensemble.cc | 14 ++--- src/BayesNet/Network.cc | 69 ++++++++++++++++++++----- src/BayesNet/Network.h | 1 + src/BayesNet/Node.cc | 8 +++ src/BayesNet/Node.h | 1 + src/BayesNet/TAN.cc | 2 +- src/BayesNet/bayesnetUtils.cc | 2 +- src/Platform/CMakeLists.txt | 3 +- src/Platform/Experiment.cc | 23 +++++---- src/Platform/Folding.cc | 13 ++--- src/Platform/Folding.h | 4 +- src/Platform/platformUtils.cc | 6 +-- src/Platform/testx.cpp | 42 ++++++++++++--- 18 files changed, 255 insertions(+), 72 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index e881ccf..c8b7f7f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,13 +4,19 @@ { "type": "lldb", "request": "launch", - "name": "bayesnet", - "program": "${workspaceFolder}/build/sample/main", + "name": "sample", + "program": "${workspaceFolder}/build/sample/BayesNetSample", "args": [ - "-f", - "iris" + "-d", + "iris", + "-m", + "TAN", + "-p", + "../../data/", + "--stratified", + "--tensors" ], - "cwd": "${workspaceFolder}", + "cwd": "${workspaceFolder}/build/sample/", "preLaunchTask": "CMake: build" }, { @@ -25,7 +31,9 @@ "TAN", "-p", "../../../data/", - "--discretize" + "--discretize", + "-f", + "2" ], "cwd": "${workspaceFolder}/build/src/Platform", }, diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 0953c7a..4f9d087 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -3,5 +3,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) -add_executable(BayesNetSample sample.cc) +add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc) target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/sample/sample.cc b/sample/sample.cc index 3b6de05..613a2fa 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -12,6 +13,7 @@ #include "SPODE.h" #include "AODE.h" #include "TAN.h" +#include "Folding.h" using namespace std; @@ -42,6 +44,21 @@ bool file_exists(const std::string& name) return false; } } +pair>, vector> extract_indices(vector indices, vector> X, vector y) +{ + vector> Xr; + vector yr; + for (int col = 0; col < X.size(); ++col) { + Xr.push_back(vector()); + } + for (auto index : indices) { + for (int col = 0; col < X.size(); ++col) { + Xr[col].push_back(X[col][index]); + } + yr.push_back(y[index]); + } + return { Xr, yr }; +} int main(int argc, char** argv) { @@ -60,7 +77,7 @@ int main(int argc, char** argv) valid_datasets.push_back(dataset.first); } argparse::ArgumentParser program("BayesNetSample"); - program.add_argument("-f", "--file") + program.add_argument("-d", "--dataset") .help("Dataset file name") .action([valid_datasets](const std::string& value) { if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { @@ -83,14 +100,37 @@ int main(int argc, char** argv) throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}"); } ); - bool class_last; + program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); + program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); + program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); + program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { + try { + auto k = stoi(value); + if (k < 2) { + throw runtime_error("Number of folds must be greater than 1"); + } + return k; + } + catch (const runtime_error& err) { + throw runtime_error(err.what()); + } + catch (...) { + throw runtime_error("Number of folds must be an integer"); + }}); + program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); + bool class_last, stratified, tensors; string model_name, file_name, path, complete_file_name; + int nFolds, seed; try { program.parse_args(argc, argv); - file_name = program.get("file"); + file_name = program.get("dataset"); path = program.get("path"); model_name = program.get("model"); complete_file_name = path + file_name + ".arff"; + stratified = program.get("stratified"); + tensors = program.get("tensors"); + nFolds = program.get("folds"); + seed = program.get("seed"); class_last = datasets[file_name]; if (!file_exists(complete_file_name)) { throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); @@ -144,5 +184,55 @@ int main(int argc, char** argv) file.close(); cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; + string stratified_string = stratified ? " Stratified" : ""; + cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl; + cout << "==========================================" << endl; + torch::Tensor Xt = torch::zeros({ static_cast(Xd.size()), static_cast(Xd[0].size()) }, torch::kInt32); + torch::Tensor yt = torch::tensor(y, torch::kInt32); + for (int i = 0; i < features.size(); ++i) { + Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32)); + } + float total_score = 0, total_score_train = 0, score_train, score_test; + Fold* fold; + if (stratified) + fold = new StratifiedKFold(nFolds, y, seed); + else + fold = new KFold(nFolds, y.size(), seed); + for (auto i = 0; i < nFolds; ++i) { + auto [train, test] = fold->getFold(i); + cout << "Fold: " << i + 1 << endl; + if (tensors) { + cout << "Xt shape: " << Xt.sizes() << endl; + cout << "yt shape: " << yt.sizes() << endl; + auto ttrain = torch::tensor(train, torch::kInt64); + auto ttest = torch::tensor(test, torch::kInt64); + torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain); + torch::Tensor ytraint = yt.index({ ttrain }); + torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); + torch::Tensor ytestt = yt.index({ ttest }); + cout << "Train: " << Xtraint.size(0) << " x " << Xtraint.size(1) << " " << ytraint.size(0) << endl; + cout << "Test : " << Xtestt.size(0) << " x " << Xtestt.size(1) << " " << ytestt.size(0) << endl; + clf->fit(Xtraint, ytraint, features, className, states); + score_train = clf->score(Xtraint, ytraint); + score_test = clf->score(Xtestt, ytestt); + } else { + auto [Xtrain, ytrain] = extract_indices(train, Xd, y); + auto [Xtest, ytest] = extract_indices(test, Xd, y); + cout << "Train: " << Xtrain.size() << " x " << Xtrain[0].size() << " " << ytrain.size() << endl; + cout << "Test : " << Xtest.size() << " x " << Xtest[0].size() << " " << ytest.size() << endl; + clf->fit(Xtrain, ytrain, features, className, states); + score_train = clf->score(Xtrain, ytrain); + score_test = clf->score(Xtest, ytest); + } + total_score_train += score_train; + total_score += score_test; + cout << "Score Train: " << score_train << endl; + cout << "Score Test : " << score_test << endl; + // cout << "-------------------------------------------------------------------------------" << endl; + // total_score += score_value; + } + cout << "**********************************************************************************" << endl; + cout << "Average Score Train: " << total_score_train / nFolds << endl; + cout << "Average Score Test : " << total_score / nFolds << endl; return 0; } \ No newline at end of file diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc index a5abb6b..8c46582 100644 --- a/src/BayesNet/BayesMetrics.cc +++ b/src/BayesNet/BayesMetrics.cc @@ -13,11 +13,11 @@ namespace bayesnet { , className(className) , classNumStates(classNumStates) { - samples = torch::zeros({ static_cast(vsamples[0].size()), static_cast(vsamples.size() + 1) }, torch::kInt64); + samples = torch::zeros({ static_cast(vsamples[0].size()), static_cast(vsamples.size() + 1) }, torch::kInt32); for (int i = 0; i < vsamples.size(); ++i) { - samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt64)); + samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt32)); } - samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64)); + samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32)); } vector> Metrics::doCombinations(const vector& source) { @@ -43,8 +43,8 @@ namespace bayesnet { margin[value] = mask.sum().item() / samples.sizes()[0]; } for (auto [first, second] : combinations) { - int64_t index_first = find(features.begin(), features.end(), first) - features.begin(); - int64_t index_second = find(features.begin(), features.end(), second) - features.begin(); + int index_first = find(features.begin(), features.end(), first) - features.begin(); + int index_second = find(features.begin(), features.end(), second) - features.begin(); double accumulated = 0; for (int value = 0; value < classNumStates; ++value) { auto mask = samples.index({ "...", -1 }) == value; diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index 6d0519a..6fb67d9 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -25,7 +25,7 @@ namespace bayesnet { } Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) { - this->X = X; + this->X = torch::transpose(X, 0, 1); this->y = y; Xv = vector>(); yv = vector(y.data_ptr(), y.data_ptr() + y.size(0)); @@ -34,12 +34,12 @@ namespace bayesnet { Classifier& Classifier::fit(vector>& X, vector& y, vector& features, string className, map>& states) { - this->X = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, kInt64); + this->X = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, kInt32); Xv = X; for (int i = 0; i < X.size(); ++i) { - this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64)); + this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32)); } - this->y = torch::tensor(y, kInt64); + this->y = torch::tensor(y, kInt32); yv = y; return build(features, className, states); } @@ -77,7 +77,7 @@ namespace bayesnet { Xd[i] = vector(temp.data_ptr(), temp.data_ptr() + m_); } auto yp = model.predict(Xd); - auto ypred = torch::tensor(yp, torch::kInt64); + auto ypred = torch::tensor(yp, torch::kInt32); return ypred; } vector Classifier::predict(vector>& X) @@ -121,6 +121,7 @@ namespace bayesnet { } void Classifier::addNodes() { + auto test = model.getEdges(); // Add all nodes to the network for (auto feature : features) { model.addNode(feature, states[feature].size()); diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index 59fdf91..b8c6418 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -32,12 +32,12 @@ namespace bayesnet { } Ensemble& Ensemble::fit(vector>& X, vector& y, vector& features, string className, map>& states) { - this->X = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, kInt64); + this->X = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, kInt32); Xv = X; for (int i = 0; i < X.size(); ++i) { - this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64)); + this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32)); } - this->y = torch::tensor(y, kInt64); + this->y = torch::tensor(y, kInt32); yv = y; return build(features, className, states); } @@ -46,7 +46,7 @@ namespace bayesnet { if (!fitted) { throw logic_error("Ensemble has not been fitted"); } - Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64); + Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt32); for (auto i = 0; i < n_models; ++i) { y_pred.index_put_({ "...", i }, models[i]->predict(X)); } @@ -54,7 +54,7 @@ namespace bayesnet { } vector Ensemble::voting(Tensor& y_pred) { - auto y_pred_ = y_pred.accessor(); + auto y_pred_ = y_pred.accessor(); vector y_pred_final; for (int i = 0; i < y_pred.size(0); ++i) { vector votes(states[className].size(), 0); @@ -77,9 +77,9 @@ namespace bayesnet { for (auto i = 0; i < n_; i++) { Xd[i] = vector(X[i].begin(), X[i].end()); } - Tensor y_pred = torch::zeros({ m_, n_models }, kInt64); + Tensor y_pred = torch::zeros({ m_, n_models }, kInt32); for (auto i = 0; i < n_models; ++i) { - y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64)); + y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32)); } return voting(y_pred); } diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index d7879a2..7bf64d2 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -26,7 +26,8 @@ namespace bayesnet { features.push_back(name); } if (nodes.find(name) != nodes.end()) { - // if node exists update its number of states + // if node exists update its number of states and remove parents, children and CPT + nodes[name]->clear(); nodes[name]->setNumStates(numStates); return; } @@ -88,7 +89,6 @@ namespace bayesnet { nodes[child]->removeParent(nodes[parent].get()); throw invalid_argument("Adding this edge forms a cycle in the graph."); } - } map>& Network::getNodes() { @@ -96,23 +96,71 @@ namespace bayesnet { } void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector& featureNames, const string& className) { - this->fit(tensorToVector(X), vector(y.data_ptr(), y.data_ptr() + y.size(0)), featureNames, className); + features = featureNames; + this->className = className; + dataset.clear(); + classNumStates = torch::max(y).item() + 1; + samples = torch::cat({ X, y.view({ y.size(0), 1 }) }, 1); + for (int i = 0; i < featureNames.size(); ++i) { + auto column = torch::flatten(X.index({ "...", i })); + auto k = vector(); + for (auto i = 0; i < X.size(0); ++i) { + k.push_back(column[i].item()); + } + dataset[featureNames[i]] = k; + } + dataset[className] = vector(y.data_ptr(), y.data_ptr() + y.size(0)); + // // + // // Check if data is ok + // cout << "******************************************************************" << endl; + // cout << "Check samples, sizes: " << samples.sizes() << endl; + // for (auto i = 0; i < features.size(); ++i) { + // cout << featureNames[i] << ": " << nodes[featureNames[i]]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", i })).item() + 1 << " dataset" << *max_element(dataset[featureNames[i]].begin(), dataset[featureNames[i]].end()) + 1 << endl; + // } + // cout << className << ": " << nodes[className]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", -1 })) + 1 << endl; + // cout << "******************************************************************" << endl; + // // + // // + /* + + + */ + for (int i = 0; i < features.size(); ++i) { + cout << "Checking " << features[i] << endl; + auto column = torch::flatten(X.index({ "...", i })); + auto k = vector(); + for (auto i = 0; i < X.size(0); ++i) { + k.push_back(column[i].item()); + } + if (k != dataset[features[i]]) { + throw invalid_argument("Dataset and samples do not match"); + } + } + /* + + + */ + completeFit(); } void Network::fit(const vector>& input_data, const vector& labels, const vector& featureNames, const string& className) { features = featureNames; this->className = className; dataset.clear(); - // Build dataset & tensor of samples - samples = torch::zeros({ static_cast(input_data[0].size()), static_cast(input_data.size() + 1) }, torch::kInt64); + samples = torch::zeros({ static_cast(input_data[0].size()), static_cast(input_data.size() + 1) }, torch::kInt32); for (int i = 0; i < featureNames.size(); ++i) { dataset[featureNames[i]] = input_data[i]; - samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64)); + samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt32)); } dataset[className] = labels; - samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64)); + samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32)); classNumStates = *max_element(labels.begin(), labels.end()) + 1; + completeFit(); + } + void Network::completeFit() + { + int maxThreadsRunning = static_cast(std::thread::hardware_concurrency() * maxThreads); if (maxThreadsRunning < 1) { maxThreadsRunning = 1; @@ -122,15 +170,12 @@ namespace bayesnet { condition_variable cv; int activeThreads = 0; int nextNodeIndex = 0; - while (nextNodeIndex < nodes.size()) { unique_lock lock(mtx); cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; }); - if (nextNodeIndex >= nodes.size()) { break; // No more work remaining } - threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() { while (true) { unique_lock lock(mtx); @@ -140,7 +185,6 @@ namespace bayesnet { auto& pair = *std::next(nodes.begin(), nextNodeIndex); ++nextNodeIndex; lock.unlock(); - pair.second->computeCPT(dataset, laplaceSmoothing); lock.lock(); nodes[pair.first] = std::move(pair.second); @@ -150,7 +194,6 @@ namespace bayesnet { --activeThreads; cv.notify_one(); }); - ++activeThreads; } for (auto& thread : threads) { @@ -218,7 +261,6 @@ namespace bayesnet { evidence[features[i]] = sample[i]; } return exactInference(evidence); - } double Network::computeFactor(map& completeEvidence) { @@ -292,5 +334,4 @@ namespace bayesnet { } return edges; } - } diff --git a/src/BayesNet/Network.h b/src/BayesNet/Network.h index d16e53c..8ef8be7 100644 --- a/src/BayesNet/Network.h +++ b/src/BayesNet/Network.h @@ -24,6 +24,7 @@ namespace bayesnet { double entropy(torch::Tensor&); double conditionalEntropy(torch::Tensor&, torch::Tensor&); double mutualInformation(torch::Tensor&, torch::Tensor&); + void completeFit(); public: Network(); Network(float, int); diff --git a/src/BayesNet/Node.cc b/src/BayesNet/Node.cc index 0a5a580..d33fecf 100644 --- a/src/BayesNet/Node.cc +++ b/src/BayesNet/Node.cc @@ -6,6 +6,14 @@ namespace bayesnet { : name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector()), children(vector()) { } + void Node::clear() + { + parents.clear(); + children.clear(); + cpTable = torch::Tensor(); + dimensions.clear(); + numStates = 0; + } string Node::getName() const { return name; diff --git a/src/BayesNet/Node.h b/src/BayesNet/Node.h index 45c9c02..5c5932a 100644 --- a/src/BayesNet/Node.h +++ b/src/BayesNet/Node.h @@ -17,6 +17,7 @@ namespace bayesnet { public: vector> combinations(const vector&); Node(const std::string&, int); + void clear(); void addParent(Node*); void addChild(Node*); void removeParent(Node*); diff --git a/src/BayesNet/TAN.cc b/src/BayesNet/TAN.cc index 9c8dfff..0e87f44 100644 --- a/src/BayesNet/TAN.cc +++ b/src/BayesNet/TAN.cc @@ -3,7 +3,7 @@ namespace bayesnet { using namespace torch; - TAN::TAN() : Classifier(Network()) {} + TAN::TAN() : Classifier(Network(0.1)) {} void TAN::train() { diff --git a/src/BayesNet/bayesnetUtils.cc b/src/BayesNet/bayesnetUtils.cc index 66502aa..356b4ea 100644 --- a/src/BayesNet/bayesnetUtils.cc +++ b/src/BayesNet/bayesnetUtils.cc @@ -18,7 +18,7 @@ namespace bayesnet { // Iterate over cols for (int i = 0; i < tensor.size(1); ++i) { auto col_tensor = tensor.index({ "...", i }); - auto col = vector(col_tensor.data_ptr(), col_tensor.data_ptr() + tensor.size(0)); + auto col = vector(col_tensor.data_ptr(), col_tensor.data_ptr() + tensor.size(0)); result.push_back(col); } return result; diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 6b4632e..7a6dada 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -5,4 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) add_executable(main Experiment.cc Folding.cc platformUtils.cc) add_executable(testx testx.cpp Folding.cc) -target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file +target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") +target_link_libraries(testx ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index c0ede52..6bca89b 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -18,8 +18,13 @@ using namespace std; -Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector features, string className, map> states) +Result cross_validation(Fold* fold, string model_name, Tensor& X, Tensor& y, vector features, string className, map> states) { + auto classifiers = map({ + { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, + { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } + } + ); auto result = Result(); auto k = fold->getNumberOfFolds(); auto accuracy = torch::zeros({ k }, kFloat64); @@ -27,6 +32,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, auto test_time = torch::zeros({ k }, kFloat64); Timer train_timer, test_timer; for (int i = 0; i < k; i++) { + bayesnet::BaseClassifier* model = classifiers[model_name]; train_timer.start(); auto [train, test] = fold->getFold(i); auto train_t = torch::tensor(train); @@ -43,8 +49,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, cout << "y_test: " << y_test.sizes() << endl; train_time[i] = train_timer.getDuration(); test_timer.start(); - //auto acc = model->score(X_test, y_test); - auto acc = 7; + auto acc = model->score(X_test, y_test); test_time[i] = test_timer.getDuration(); accuracy[i] = acc; } @@ -140,18 +145,16 @@ int main(int argc, char** argv) fold = new StratifiedKFold(n_folds, y, -1); else fold = new KFold(n_folds, y.numel(), -1); - auto classifiers = map({ - { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, - { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } - } - ); + auto experiment = Experiment(); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp"); experiment.setStratified(stratified).setNFolds(5).addRandomSeed(271).setScoreName("accuracy"); - bayesnet::BaseClassifier* model = classifiers[model_name]; - auto result = cross_validation(fold, model, X, y, features, className, states); + auto result = cross_validation(fold, model_name, X, y, features, className, states); result.setDataset(file_name); experiment.addResult(result); experiment.save(path); + for (auto& item : states) { + cout << item.first << ": " << item.second.size() << endl; + } return 0; } diff --git a/src/Platform/Folding.cc b/src/Platform/Folding.cc index 2f1533b..b688353 100644 --- a/src/Platform/Folding.cc +++ b/src/Platform/Folding.cc @@ -1,13 +1,16 @@ #include "Folding.h" #include #include -#include +Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed) +{ + random_device rd; + random_seed = default_random_engine(seed == -1 ? rd() : seed); + srand(seed == -1 ? time(0) : seed); +} KFold::KFold(int k, int n, int seed) : Fold(k, n, seed) { indices = vector(n); iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1 - random_device rd; - default_random_engine random_seed(seed == -1 ? rd() : seed); shuffle(indices.begin(), indices.end(), random_seed); } pair, vector> KFold::getFold(int nFold) @@ -54,8 +57,6 @@ void StratifiedKFold::build() class_indices[y[i]].push_back(i); } // Shuffle class indices - random_device rd; - default_random_engine random_seed(seed == -1 ? rd() : seed); for (auto& [cls, indices] : class_indices) { shuffle(indices.begin(), indices.end(), random_seed); } @@ -71,7 +72,7 @@ void StratifiedKFold::build() class_indices[label].erase(class_indices[label].begin(), it); } while (remainder_samples_to_take > 0) { - int fold = (arc4random() % static_cast(k)); + int fold = (rand() % static_cast(k)); if (stratified_indices[fold].size() == fold_size) { continue; } diff --git a/src/Platform/Folding.h b/src/Platform/Folding.h index 4e37ee1..d7736d0 100644 --- a/src/Platform/Folding.h +++ b/src/Platform/Folding.h @@ -2,6 +2,7 @@ #define FOLDING_H #include #include +#include using namespace std; class Fold { @@ -9,8 +10,9 @@ protected: int k; int n; int seed; + default_random_engine random_seed; public: - Fold(int k, int n, int seed = -1) : k(k), n(n), seed(seed) {} + Fold(int k, int n, int seed = -1); virtual pair, vector> getFold(int nFold) = 0; virtual ~Fold() = default; int getNumberOfFolds() { return k; } diff --git a/src/Platform/platformUtils.cc b/src/Platform/platformUtils.cc index 4c383ec..ea8fad3 100644 --- a/src/Platform/platformUtils.cc +++ b/src/Platform/platformUtils.cc @@ -55,16 +55,16 @@ tuple, string, map>> loadData auto states = map>(); if (discretize_dataset) { auto Xr = discretizeDataset(X, y); - Xd = torch::zeros({ static_cast(Xr[0].size()), static_cast(Xr.size()) }, torch::kInt64); + Xd = torch::zeros({ static_cast(Xr[0].size()), static_cast(Xr.size()) }, torch::kInt32); for (int i = 0; i < features.size(); ++i) { states[features[i]] = vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); iota(begin(states[features[i]]), end(states[features[i]]), 0); - Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64)); + Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32)); } states[className] = vector(*max_element(y.begin(), y.end()) + 1); iota(begin(states[className]), end(states[className]), 0); } else { - Xd = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, torch::kFloat32); + Xd = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, torch::kFloat32); for (int i = 0; i < features.size(); ++i) { Xd.index_put_({ "...", i }, torch::tensor(X[i])); } diff --git a/src/Platform/testx.cpp b/src/Platform/testx.cpp index 0ad3017..09b083a 100644 --- a/src/Platform/testx.cpp +++ b/src/Platform/testx.cpp @@ -62,14 +62,40 @@ int main() cout << endl; cout << "Test Statistics: " << counts(y, test); cout << "==============================================================================" << endl; + torch::Tensor a = torch::zeros({ 5, 3 }); + torch::Tensor b = torch::zeros({ 5 }) + 1; + torch::Tensor c = torch::cat({ a, b.view({5, 1}) }, 1); + cout << "a:" << a.sizes() << endl; + cout << a << endl; + cout << "b:" << b.sizes() << endl; + cout << b << endl; + cout << "c:" << c.sizes() << endl; + cout << c << endl; + torch::Tensor d = torch::zeros({ 5, 3 }); + torch::Tensor e = torch::tensor({ 1,2,3,4,5 }) + 1; + torch::Tensor f = torch::cat({ d, e.view({5, 1}) }, 1); + cout << "d:" << d.sizes() << endl; + cout << d << endl; + cout << "e:" << e.sizes() << endl; + cout << e << endl; + cout << "f:" << f.sizes() << endl; + cout << f << endl; + auto indices = torch::tensor({ 0, 2, 4 }); + auto k = f.index({ indices, "..." }); + cout << "k:" << k.sizes() << endl; + cout << k << endl; + auto w = torch::index_select(f, 0, indices); + cout << "w:" << w.sizes() << endl; + cout << w << endl; + // cout << "Vector poly" << endl; - // auto some = vector(); - // auto cx = C(5, 4); - // auto bx = B(7, 6); - // some.push_back(cx); - // some.push_back(bx); - // for (auto& obj : some) { - // cout << "Obj :" << obj.getA() << endl; - // } + // auto some = vector(); + // auto cx = C(5, 4); + // auto bx = B(7, 6); + // some.push_back(cx); + // some.push_back(bx); + // for (auto& obj : some) { + // cout << "Obj :" << obj.getA() << endl; + // } } }