From eb1cec58a3a06a52d6757990d00e84f6be16fa6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Thu, 3 Aug 2023 20:22:33 +0200 Subject: [PATCH] Complete nxm --- .vscode/launch.json | 5 +- Makefile | 6 +- sample/sample.cc | 21 ++++-- src/BayesNet/BaseClassifier.h | 3 + src/BayesNet/BayesMetrics.cc | 17 +++-- src/BayesNet/BayesMetrics.h | 4 +- src/BayesNet/CMakeLists.txt | 2 +- src/BayesNet/Classifier.cc | 39 +++++----- src/BayesNet/Classifier.h | 7 +- src/BayesNet/Ensemble.h | 3 + src/BayesNet/KDB.cc | 5 +- src/BayesNet/Network.cc | 135 ++++++++++++++++++++++------------ src/BayesNet/Network.h | 16 ++-- src/BayesNet/TAN.cc | 8 +- src/BayesNet/TANNew.cc | 33 ++++++--- 15 files changed, 193 insertions(+), 111 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 951d4c3..567f55a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,10 +10,11 @@ "-d", "iris", "-m", - "TANNew", + "KDB", + "-s", + "271", "-p", "/Users/rmontanana/Code/discretizbench/datasets/", - "--tensors" ], //"cwd": "${workspaceFolder}/build/sample/", }, diff --git a/Makefile b/Makefile index e7aab02..f841342 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ setup: ## Install dependencies for tests and coverage dependency: ## Create a dependency graph diagram of the project (build/dependency.png) cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png -debug: ## Build the project +debug: ## Build a debug version of the project @echo ">>> Building Debug BayesNet ..."; @if [ -d ./build ]; then rm -rf ./build; fi @mkdir build; @@ -22,12 +22,12 @@ debug: ## Build the project cmake --build build -j 32; @echo ">>> Done"; -release: +release: ## Build a Release version of the project @echo ">>> Building Release BayesNet ..."; @if [ -d ./build ]; then rm -rf ./build; fi @mkdir build; cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \ - cmake --build build -t main -j 32; + cmake --build build -t main -t BayesNetSample -j 32; @echo ">>> Done"; test: ## Run tests diff --git a/sample/sample.cc b/sample/sample.cc index 869b910..ef81546 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -95,6 +95,7 @@ int main(int argc, char** argv) } ); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); + program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { @@ -112,7 +113,7 @@ int main(int argc, char** argv) throw runtime_error("Number of folds must be an integer"); }}); program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); - bool class_last, stratified, tensors; + bool class_last, stratified, tensors, dump_cpt; string model_name, file_name, path, complete_file_name; int nFolds, seed; try { @@ -125,6 +126,7 @@ int main(int argc, char** argv) tensors = program.get("tensors"); nFolds = program.get("folds"); seed = program.get("seed"); + dump_cpt = program.get("dumpcpt"); class_last = datasets[file_name]; if (!file_exists(complete_file_name)) { throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); @@ -158,21 +160,25 @@ int main(int argc, char** argv) states[feature] = vector(maxes[feature]); } states[className] = vector(maxes[className]); - auto clf = platform::Models::instance()->create(model_name); clf->fit(Xd, y, features, className, states); - auto score = clf->score(Xd, y); + if (dump_cpt) { + cout << "--- CPT Tables ---" << endl; + clf->dump_cpt(); + } auto lines = clf->show(); - auto graph = clf->graph(); for (auto line : lines) { cout << line << endl; } cout << "--- Topological Order ---" << endl; - for (auto name : clf->topological_order()) { + auto order = clf->topological_order(); + for (auto name : order) { cout << name << ", "; } cout << "end." << endl; + auto score = clf->score(Xd, y); cout << "Score: " << score << endl; + auto graph = clf->graph(); auto dot_file = model_name + "_" + file_name; ofstream file(dot_file + ".dot"); file << graph; @@ -211,9 +217,14 @@ int main(int argc, char** argv) auto [Xtrain, ytrain] = extract_indices(train, Xd, y); auto [Xtest, ytest] = extract_indices(test, Xd, y); clf->fit(Xtrain, ytrain, features, className, states); + score_train = clf->score(Xtrain, ytrain); score_test = clf->score(Xtest, ytest); } + if (dump_cpt) { + cout << "--- CPT Tables ---" << endl; + clf->dump_cpt(); + } total_score_train += score_train; total_score += score_test; cout << "Score Train: " << score_train << endl; diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index 2aae0a4..1bff6f6 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -6,7 +6,9 @@ namespace bayesnet { using namespace std; class BaseClassifier { public: + // X is nxm vector, y is nx1 vector virtual BaseClassifier& fit(vector>& X, vector& y, vector& features, string className, map>& states) = 0; + // X is nxm tensor, y is nx1 tensor virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) = 0; torch::Tensor virtual predict(torch::Tensor& X) = 0; vector virtual predict(vector>& X) = 0; @@ -20,6 +22,7 @@ namespace bayesnet { virtual ~BaseClassifier() = default; const string inline getVersion() const { return "0.1.0"; }; vector virtual topological_order() = 0; + void virtual dump_cpt() = 0; }; } #endif \ No newline at end of file diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc index be15f07..c80fb65 100644 --- a/src/BayesNet/BayesMetrics.cc +++ b/src/BayesNet/BayesMetrics.cc @@ -1,6 +1,7 @@ #include "BayesMetrics.h" #include "Mst.h" namespace bayesnet { + //samples is nxm tensor used to fit the model Metrics::Metrics(torch::Tensor& samples, vector& features, string& className, int classNumStates) : samples(samples) , features(features) @@ -8,6 +9,7 @@ namespace bayesnet { , classNumStates(classNumStates) { } + //samples is nxm vector used to fit the model Metrics::Metrics(const vector>& vsamples, const vector& labels, const vector& features, const string& className, const int classNumStates) : features(features) , className(className) @@ -15,9 +17,9 @@ namespace bayesnet { , samples(torch::zeros({ static_cast(vsamples[0].size()), static_cast(vsamples.size() + 1) }, torch::kInt32)) { for (int i = 0; i < vsamples.size(); ++i) { - samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt32)); + samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32)); } - samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32)); + samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); } vector> Metrics::doCombinations(const vector& source) { @@ -39,17 +41,17 @@ namespace bayesnet { // Compute class prior auto margin = torch::zeros({ classNumStates }); for (int value = 0; value < classNumStates; ++value) { - auto mask = samples.index({ "...", -1 }) == value; - margin[value] = mask.sum().item() / samples.sizes()[0]; + auto mask = samples.index({ -1, "..." }) == value; + margin[value] = mask.sum().item() / samples.size(1); } for (auto [first, second] : combinations) { int index_first = find(features.begin(), features.end(), first) - features.begin(); int index_second = find(features.begin(), features.end(), second) - features.begin(); double accumulated = 0; for (int value = 0; value < classNumStates; ++value) { - auto mask = samples.index({ "...", -1 }) == value; - auto first_dataset = samples.index({ mask, index_first }); - auto second_dataset = samples.index({ mask, index_second }); + auto mask = samples.index({ -1, "..." }) == value; + auto first_dataset = samples.index({ index_first, mask }); + auto second_dataset = samples.index({ index_second, mask }); auto mi = mutualInformation(first_dataset, second_dataset); auto pb = margin[value].item(); accumulated += pb * mi; @@ -67,6 +69,7 @@ namespace bayesnet { } return matrix; } + // To use in Python vector Metrics::conditionalEdgeWeights() { auto matrix = conditionalEdge(); diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesNet/BayesMetrics.h index 427b1d4..183cfcc 100644 --- a/src/BayesNet/BayesMetrics.h +++ b/src/BayesNet/BayesMetrics.h @@ -8,7 +8,7 @@ namespace bayesnet { using namespace torch; class Metrics { private: - Tensor samples; + Tensor samples; // nxm tensor used to fit the model vector features; string className; int classNumStates = 0; @@ -19,7 +19,7 @@ namespace bayesnet { double entropy(Tensor&); double conditionalEntropy(Tensor&, Tensor&); double mutualInformation(Tensor&, Tensor&); - vector conditionalEdgeWeights(); + vector conditionalEdgeWeights(); // To use in Python Tensor conditionalEdge(); vector> doCombinations(const vector&); vector> maximumSpanningTree(vector features, Tensor& weights, int root); diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt index ea530a9..6fa1c24 100644 --- a/src/BayesNet/CMakeLists.txt +++ b/src/BayesNet/CMakeLists.txt @@ -1,4 +1,4 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/Files) add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANNew.cc Mst.cc) -target_link_libraries(BayesNet mdlp arff "${TORCH_LIBRARIES}") \ No newline at end of file +target_link_libraries(BayesNet mdlp ArffFiles "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index 3099979..f68d1dc 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -7,15 +7,18 @@ namespace bayesnet { Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier& Classifier::build(vector& features, string className, map>& states) { - dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1); + Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); + samples = torch::cat({ X, ytmp }, 0); this->features = features; this->className = className; this->states = states; + cout << "Classifier samples: " << samples.sizes() << endl; checkFitParameters(); auto n_classes = states[className].size(); - metrics = Metrics(dataset, features, className, n_classes); + metrics = Metrics(samples, features, className, n_classes); + model.initialize(); train(); - if (Xv == vector>()) { + if (Xv.empty()) { // fit with tensors model.fit(X, y, features, className); } else { @@ -25,21 +28,23 @@ namespace bayesnet { fitted = true; return *this; } + // X is nxm where n is the number of features and m the number of samples Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) { - this->X = torch::transpose(X, 0, 1); + this->X = X; this->y = y; Xv = vector>(); yv = vector(y.data_ptr(), y.data_ptr() + y.size(0)); return build(features, className, states); } - + // X is nxm where n is the number of features and m the number of samples Classifier& Classifier::fit(vector>& X, vector& y, vector& features, string className, map>& states) { - this->X = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, kInt32); + + this->X = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, kInt32); Xv = X; for (int i = 0; i < X.size(); ++i) { - this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32)); + this->X.index_put_({ i, "..." }, torch::tensor(X[i], kInt32)); } this->y = torch::tensor(y, kInt32); yv = y; @@ -48,8 +53,8 @@ namespace bayesnet { void Classifier::checkFitParameters() { auto sizes = X.sizes(); - m = sizes[0]; - n = sizes[1]; + m = sizes[1]; + n = sizes[0]; if (m != y.size(0)) { throw invalid_argument("X and y must have the same number of samples"); } @@ -70,9 +75,7 @@ namespace bayesnet { if (!fitted) { throw logic_error("Classifier has not been fitted"); } - auto Xt = torch::transpose(X, 0, 1); // Base classifiers expect samples as columns - auto y_proba = model.predict(Xt); - return y_proba.argmax(1); + return model.predict(X); } vector Classifier::predict(vector>& X) { @@ -101,12 +104,6 @@ namespace bayesnet { if (!fitted) { throw logic_error("Classifier has not been fitted"); } - // auto m_ = X[0].size(); - // auto n_ = X.size(); - // vector> Xd(n_, vector(m_, 0)); - // for (auto i = 0; i < n_; i++) { - // Xd[i] = vector(X[i].begin(), X[i].end()); - // } return model.score(X, y); } vector Classifier::show() @@ -116,7 +113,7 @@ namespace bayesnet { void Classifier::addNodes() { // Add all nodes to the network - for (auto feature : features) { + for (const auto& feature : features) { model.addNode(feature, states[feature].size()); } model.addNode(className, states[className].size()); @@ -138,4 +135,8 @@ namespace bayesnet { { return model.topological_sort(); } + void Classifier::dump_cpt() + { + model.dump_cpt(); + } } \ No newline at end of file diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h index ddfa251..e10a523 100644 --- a/src/BayesNet/Classifier.h +++ b/src/BayesNet/Classifier.h @@ -15,11 +15,11 @@ namespace bayesnet { protected: Network model; int m, n; // m: number of samples, n: number of features - Tensor X; - vector> Xv; + Tensor X; // nxm tensor + vector> Xv; // nxm vector Tensor y; vector yv; - Tensor dataset; + Tensor samples; // (n+1)xm tensor Metrics metrics; vector features; string className; @@ -41,6 +41,7 @@ namespace bayesnet { float score(vector>& X, vector& y) override; vector show() override; vector topological_order() override; + void dump_cpt() override; }; } #endif diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index 45f4d5a..ef9d7f1 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -45,6 +45,9 @@ namespace bayesnet { { return vector(); } + void dump_cpt() override + { + } }; } #endif diff --git a/src/BayesNet/KDB.cc b/src/BayesNet/KDB.cc index cfdf750..a0ab434 100644 --- a/src/BayesNet/KDB.cc +++ b/src/BayesNet/KDB.cc @@ -27,9 +27,10 @@ namespace bayesnet { */ // 1. For each feature Xi, compute mutual information, I(X;C), // where C is the class. + addNodes(); vector mi; for (auto i = 0; i < features.size(); i++) { - Tensor firstFeature = X.index({ "...", i }); + Tensor firstFeature = X.index({ i, "..." }); mi.push_back(metrics.mutualInformation(firstFeature, y)); } // 2. Compute class conditional mutual information I(Xi;XjIC), f or each @@ -38,14 +39,12 @@ namespace bayesnet { vector S; // 4. Let the DAG network being constructed, BN, begin with a single // class node, C. - model.addNode(className, states[className].size()); // 5. Repeat until S includes all domain features // 5.1. Select feature Xmax which is not in S and has the largest value // I(Xmax;C). auto order = argsort(mi); for (auto idx : order) { // 5.2. Add a node to BN representing Xmax. - model.addNode(features[idx], states[features[idx]].size()); // 5.3. Add an arc from C to Xmax in BN. model.addEdge(className, features[idx]); // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index 1c212be..7d18a7b 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -6,12 +6,23 @@ namespace bayesnet { Network::Network() : features(vector()), className(""), classNumStates(0), fitted(false) {} Network::Network(float maxT) : features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} - Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted) + Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other. + getmaxThreads()), fitted(other.fitted) { for (const auto& pair : other.nodes) { nodes[pair.first] = std::make_unique(*pair.second); } } + void Network::initialize() + { + features = vector(); + className = ""; + classNumStates = 0; + fitted = false; + nodes.clear(); + dataset.clear(); + samples = torch::Tensor(); + } float Network::getmaxThreads() { return maxThreads; @@ -22,6 +33,9 @@ namespace bayesnet { } void Network::addNode(const string& name, int numStates) { + if (name == "") { + throw invalid_argument("Node name cannot be empty"); + } if (find(features.begin(), features.end(), name) == features.end()) { features.push_back(name); } @@ -94,40 +108,59 @@ namespace bayesnet { { return nodes; } + void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector& featureNames, const string& className) + { + if (n_samples != n_samples_y) { + throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")"); + } + if (n_features != featureNames.size()) { + throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")"); + } + if (n_features != features.size() - 1) { + throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")"); + } + if (find(features.begin(), features.end(), className) == features.end()) { + throw invalid_argument("className not found in Network::features"); + } + for (auto& feature : featureNames) { + if (find(features.begin(), features.end(), feature) == features.end()) { + throw invalid_argument("Feature " + feature + " not found in Network::features"); + } + } + } + // X comes in nxm, where n is the number of features and m the number of samples void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector& featureNames, const string& className) { - features = featureNames; + checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className); this->className = className; dataset.clear(); // Specific part classNumStates = torch::max(y).item() + 1; - samples = torch::cat({ X, y.view({ y.size(0), 1 }) }, 1); + Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); + samples = torch::cat({ X , ytmp }, 0); for (int i = 0; i < featureNames.size(); ++i) { - auto column = torch::flatten(X.index({ "...", i })); - auto k = vector(); - for (auto z = 0; z < X.size(0); ++z) { - k.push_back(column[z].item()); - } - dataset[featureNames[i]] = k; + auto row_feature = X.index({ i, "..." }); + dataset[featureNames[i]] = vector(row_feature.data_ptr(), row_feature.data_ptr() + row_feature.size(0));; } dataset[className] = vector(y.data_ptr(), y.data_ptr() + y.size(0)); completeFit(); } + // input_data comes in nxm, where n is the number of features and m the number of samples void Network::fit(const vector>& input_data, const vector& labels, const vector& featureNames, const string& className) { - features = featureNames; + checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className); this->className = className; dataset.clear(); // Specific part classNumStates = *max_element(labels.begin(), labels.end()) + 1; - // Build dataset & tensor of samples - samples = torch::zeros({ static_cast(input_data[0].size()), static_cast(input_data.size() + 1) }, torch::kInt32); + // Build dataset & tensor of samples (nxm) (n+1 because of the class) + samples = torch::zeros({ static_cast(input_data.size() + 1), static_cast(input_data[0].size()) }, torch::kInt32); for (int i = 0; i < featureNames.size(); ++i) { dataset[featureNames[i]] = input_data[i]; - samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt32)); + samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); } dataset[className] = labels; - samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32)); + samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); completeFit(); } void Network::completeFit() @@ -169,35 +202,36 @@ namespace bayesnet { } fitted = true; } - Tensor Network::predict_proba(const Tensor& samples) - { - if (!fitted) { - throw logic_error("You must call fit() before calling predict_proba()"); - } - Tensor result = torch::zeros({ samples.size(0), classNumStates }, torch::kFloat64); - auto Xt = torch::transpose(samples, 0, 1); - for (int i = 0; i < samples.size(0); ++i) { - auto sample = Xt.index({ "...", i }); - auto classProbabilities = predict_sample(sample); - result.index_put_({ i, "..." }, torch::tensor(classProbabilities, torch::kFloat64)); - } - return result; - } - Tensor Network::predict(const Tensor& samples) + torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) { if (!fitted) { throw logic_error("You must call fit() before calling predict()"); } - Tensor result = torch::zeros({ samples.size(0), classNumStates }, torch::kFloat64); - auto Xt = torch::transpose(samples, 0, 1); - for (int i = 0; i < samples.size(0); ++i) { - auto sample = Xt.index({ "...", i }); - auto classProbabilities = predict_sample(sample); - result.index_put_({ i, "..." }, torch::tensor(classProbabilities, torch::kFloat64)); + torch::Tensor result; + result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); + for (int i = 0; i < samples.size(1); ++i) { + auto sample = samples.index({ "...", i }); + result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); } - return result; + if (proba) + return result; + else + return result.argmax(1); + } + // Return mxn tensor of probabilities + Tensor Network::predict_proba(const Tensor& samples) + { + return predict_tensor(samples, true); } + // Return mxn tensor of probabilities + Tensor Network::predict(const Tensor& samples) + { + return predict_tensor(samples, false); + } + + // Return mx1 vector of predictions + // tsamples is nxm vector of samples vector Network::predict(const vector>& tsamples) { if (!fitted) { @@ -218,6 +252,7 @@ namespace bayesnet { } return predictions; } + // Return mxn vector of probabilities vector> Network::predict_proba(const vector>& tsamples) { if (!fitted) { @@ -245,12 +280,13 @@ namespace bayesnet { } return (double)correct / y_pred.size(); } + // Return 1xn vector of probabilities vector Network::predict_sample(const vector& sample) { // Ensure the sample size is equal to the number of features - if (sample.size() != features.size()) { + if (sample.size() != features.size() - 1) { throw invalid_argument("Sample size (" + to_string(sample.size()) + - ") does not match the number of features (" + to_string(features.size()) + ")"); + ") does not match the number of features (" + to_string(features.size() - 1) + ")"); } map evidence; for (int i = 0; i < sample.size(); ++i) { @@ -258,17 +294,21 @@ namespace bayesnet { } return exactInference(evidence); } + // Return 1xn vector of probabilities vector Network::predict_sample(const Tensor& sample) { // Ensure the sample size is equal to the number of features - if (sample.size(0) != features.size()) { + if (sample.size(0) != features.size() - 1) { throw invalid_argument("Sample size (" + to_string(sample.size(0)) + - ") does not match the number of features (" + to_string(features.size()) + ")"); + ") does not match the number of features (" + to_string(features.size() - 1) + ")"); } map evidence; for (int i = 0; i < sample.size(0); ++i) { evidence[features[i]] = sample[i].item(); + cout << "Evidence: " << features[i] << " = " << sample[i].item() << endl; } + cout << "BEfore exact inference" << endl; + return exactInference(evidence); } double Network::computeFactor(map& completeEvidence) @@ -345,25 +385,25 @@ namespace bayesnet { { /* Check if al the fathers of every node are before the node */ auto result = features; + result.erase(remove(result.begin(), result.end(), className), result.end()); bool ending{ false }; int idx = 0; while (!ending) { ending = true; for (auto feature : features) { - if (feature == className) { - continue; - } auto fathers = nodes[feature]->getParents(); for (const auto& father : fathers) { auto fatherName = father->getName(); if (fatherName == className) { continue; } + // Check if father is placed before the actual feature auto it = find(result.begin(), result.end(), fatherName); if (it != result.end()) { auto it2 = find(result.begin(), result.end(), feature); if (it2 != result.end()) { if (distance(it, it2) < 0) { + // if it is not, insert it before the feature result.erase(remove(result.begin(), result.end(), fatherName), result.end()); result.insert(it2, fatherName); ending = false; @@ -377,9 +417,12 @@ namespace bayesnet { } } } - - - return result; } + void Network::dump_cpt() + { + for (auto& node : nodes) { + cout << "* " << node.first << ": " << node.second->getCPT() << endl; + } + } } diff --git a/src/BayesNet/Network.h b/src/BayesNet/Network.h index 39ff5a6..49ebcfb 100644 --- a/src/BayesNet/Network.h +++ b/src/BayesNet/Network.h @@ -12,10 +12,10 @@ namespace bayesnet { bool fitted; float maxThreads = 0.95; int classNumStates; - vector features; + vector features; // Including class string className; int laplaceSmoothing = 1; - torch::Tensor samples; + torch::Tensor samples; // nxm tensor used to fit the model bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); vector predict_sample(const vector&); vector predict_sample(const torch::Tensor&); @@ -26,6 +26,7 @@ namespace bayesnet { double conditionalEntropy(torch::Tensor&, torch::Tensor&); double mutualInformation(torch::Tensor&, torch::Tensor&); void completeFit(); + void checkFitData(int n_features, int n_samples, int n_samples_y, const vector& featureNames, const string& className); public: Network(); explicit Network(float, int); @@ -43,16 +44,19 @@ namespace bayesnet { string getClassName(); void fit(const vector>&, const vector&, const vector&, const string&); void fit(torch::Tensor&, torch::Tensor&, const vector&, const string&); - vector predict(const vector>&); - torch::Tensor predict(const torch::Tensor&); + vector predict(const vector>&); // Return mx1 vector of predictions + torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions //Computes the conditional edge weight of variable index u and v conditioned on class_node torch::Tensor conditionalEdgeWeight(); - vector> predict_proba(const vector>&); - torch::Tensor predict_proba(const torch::Tensor&); + torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); + vector> predict_proba(const vector>&); // Return mxn vector of probabilities + torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities double score(const vector>&, const vector&); vector topological_sort(); vector show(); vector graph(const string& title); // Returns a vector of strings representing the graph in graphviz format + void initialize(); + void dump_cpt(); inline string version() { return "0.1.0"; } }; } diff --git a/src/BayesNet/TAN.cc b/src/BayesNet/TAN.cc index df6561a..1757a2a 100644 --- a/src/BayesNet/TAN.cc +++ b/src/BayesNet/TAN.cc @@ -12,13 +12,17 @@ namespace bayesnet { // 1. Compute mutual information between each feature and the class and set the root node // as the highest mutual information with the class auto mi = vector >(); - Tensor class_dataset = dataset.index({ "...", -1 }); + Tensor class_dataset = samples.index({ -1, "..." }); for (int i = 0; i < static_cast(features.size()); ++i) { - Tensor feature_dataset = dataset.index({ "...", i }); + Tensor feature_dataset = samples.index({ i, "..." }); auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset); mi.push_back({ i, mi_value }); } sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;}); + cout << "MI: " << endl; + for (int i = 0; i < mi.size(); ++i) { + cout << mi[i].first << " " << mi[i].second << endl; + } auto root = mi[mi.size() - 1].first; // 2. Compute mutual information between each feature and the class auto weights = metrics.conditionalEdge(); diff --git a/src/BayesNet/TANNew.cc b/src/BayesNet/TANNew.cc index 1ef85e3..57be2e6 100644 --- a/src/BayesNet/TANNew.cc +++ b/src/BayesNet/TANNew.cc @@ -32,19 +32,26 @@ namespace bayesnet { iota(yStates.begin(), yStates.end(), 0); this->states[className] = yStates; // Now we have standard TAN and now we implement the proposal + // 1st we need to fit the model to build the TAN structure + cout << "TANNew: Fitting model" << endl; + TAN::fit(Xv, yv, features, className, this->states); + cout << "TANNew: Model fitted" << endl; // order of local discretization is important. no good 0, 1, 2... + auto edges = model.getEdges(); auto order = model.topological_sort(); - auto nodes = model.getNodes(); + auto& nodes = model.getNodes(); + vector indicesToReDiscretize; bool upgrade = false; // Flag to check if we need to upgrade the model for (auto feature : order) { auto nodeParents = nodes[feature]->getParents(); int index = find(features.begin(), features.end(), feature) - features.begin(); vector parents; + transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) {return p->getName(); }); if (parents.size() == 1) continue; // Only has class as parent upgrade = true; - transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) {return p->getName(); }); // Remove class as parent as it will be added later parents.erase(remove(parents.begin(), parents.end(), className), parents.end()); + // Get the indices of the parents vector indices; transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(features.begin(), features.end(), p) - features.begin(); }); // Now we fit the discretizer of the feature conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y) @@ -58,22 +65,23 @@ namespace bayesnet { auto arff = ArffFiles(); auto yxv = arff.factorize(yJoinParents); discretizers[feature]->fit(Xvf[index], yxv); + indicesToReDiscretize.push_back(index); } if (upgrade) { - // Discretize again X with the new fitted discretizers - Xv = vector>(); - for (int i = 0; i < features.size(); ++i) { - auto Xt_ptr = X.index({ i }).data_ptr(); + // Discretize again X (only the affected indices) with the new fitted discretizers + for (auto index : indicesToReDiscretize) { + auto Xt_ptr = X.index({ index }).data_ptr(); auto Xt = vector(Xt_ptr, Xt_ptr + X.size(1)); - Xv.push_back(discretizers[features[i]]->transform(Xt)); - auto xStates = vector(discretizers[features[i]]->getCutPoints().size() + 1); + Xv[index] = discretizers[features[index]]->transform(Xt); + auto xStates = vector(discretizers[features[index]]->getCutPoints().size() + 1); iota(xStates.begin(), xStates.end(), 0); - this->states[features[i]] = xStates; + this->states[features[index]] = xStates; } + // Now we fit the model again with the new values + cout << "TANNew: Upgrading model" << endl; + model.fit(Xv, yv, features, className); + cout << "TANNew: Model upgraded" << endl; } - - - TAN::fit(Xv, yv, features, className, this->states); return *this; } void TANNew::train() @@ -88,6 +96,7 @@ namespace bayesnet { auto Xd = discretizers[features[i]]->transform(Xt); Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32)); } + cout << "TANNew Xtd: " << Xtd.sizes() << endl; return TAN::predict(Xtd); } vector TANNew::graph(const string& name)