diff --git a/sample/sample.cc b/sample/sample.cc index 039a1e9..f515405 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -202,24 +202,18 @@ int main(int argc, char** argv) auto [train, test] = fold->getFold(i); cout << "Fold: " << i + 1 << endl; if (tensors) { - cout << "Xt shape: " << Xt.sizes() << endl; - cout << "yt shape: " << yt.sizes() << endl; auto ttrain = torch::tensor(train, torch::kInt64); auto ttest = torch::tensor(test, torch::kInt64); torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain); torch::Tensor ytraint = yt.index({ ttrain }); torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); torch::Tensor ytestt = yt.index({ ttest }); - cout << "Train: " << Xtraint.size(0) << " x " << Xtraint.size(1) << " " << ytraint.size(0) << endl; - cout << "Test : " << Xtestt.size(0) << " x " << Xtestt.size(1) << " " << ytestt.size(0) << endl; clf->fit(Xtraint, ytraint, features, className, states); score_train = clf->score(Xtraint, ytraint); score_test = clf->score(Xtestt, ytestt); } else { auto [Xtrain, ytrain] = extract_indices(train, Xd, y); auto [Xtest, ytest] = extract_indices(test, Xd, y); - cout << "Train: " << Xtrain.size() << " x " << Xtrain[0].size() << " " << ytrain.size() << endl; - cout << "Test : " << Xtest.size() << " x " << Xtest[0].size() << " " << ytest.size() << endl; clf->fit(Xtrain, ytrain, features, className, states); score_train = clf->score(Xtrain, ytrain); score_test = clf->score(Xtest, ytest); diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index f6297a2..c23e29e 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -7,11 +7,6 @@ namespace bayesnet { Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier& Classifier::build(vector& features, string className, map>& states) { - cout << "Building classifier..." << endl; - cout << "X sizes = " << X.sizes() << endl; - cout << "y sizes = " << y.sizes() << endl; - cout << "Xv size = " << Xv.size() << endl; - cout << "yv size = " << yv.size() << endl; dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1); this->features = features; this->className = className; @@ -21,8 +16,10 @@ namespace bayesnet { metrics = Metrics(dataset, features, className, n_classes); train(); if (Xv == vector>()) { + // fit with tensors model.fit(X, y, features, className); } else { + // fit with vectors model.fit(Xv, yv, features, className); } fitted = true; @@ -33,10 +30,6 @@ namespace bayesnet { this->X = torch::transpose(X, 0, 1); this->y = y; Xv = vector>(); - for (int i = 0; i < X.size(1); ++i) { - auto temp = X.index({ "...", i }); - Xv.push_back(vector(temp.data_ptr(), temp.data_ptr() + temp.numel())); - } yv = vector(y.data_ptr(), y.data_ptr() + y.size(0)); return build(features, className, states); } @@ -109,7 +102,8 @@ namespace bayesnet { if (!fitted) { throw logic_error("Classifier has not been fitted"); } - Tensor y_pred = predict(X); + auto Xt = torch::transpose(X, 0, 1); + Tensor y_pred = predict(Xt); return (y_pred == y).sum().item() / y.size(0); } float Classifier::score(vector>& X, vector& y) diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index 7bf64d2..eb3ffeb 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -99,6 +99,7 @@ namespace bayesnet { features = featureNames; this->className = className; dataset.clear(); + // Specific part classNumStates = torch::max(y).item() + 1; samples = torch::cat({ X, y.view({ y.size(0), 1 }) }, 1); for (int i = 0; i < featureNames.size(); ++i) { @@ -110,36 +111,6 @@ namespace bayesnet { dataset[featureNames[i]] = k; } dataset[className] = vector(y.data_ptr(), y.data_ptr() + y.size(0)); - // // - // // Check if data is ok - // cout << "******************************************************************" << endl; - // cout << "Check samples, sizes: " << samples.sizes() << endl; - // for (auto i = 0; i < features.size(); ++i) { - // cout << featureNames[i] << ": " << nodes[featureNames[i]]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", i })).item() + 1 << " dataset" << *max_element(dataset[featureNames[i]].begin(), dataset[featureNames[i]].end()) + 1 << endl; - // } - // cout << className << ": " << nodes[className]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", -1 })) + 1 << endl; - // cout << "******************************************************************" << endl; - // // - // // - /* - - - */ - for (int i = 0; i < features.size(); ++i) { - cout << "Checking " << features[i] << endl; - auto column = torch::flatten(X.index({ "...", i })); - auto k = vector(); - for (auto i = 0; i < X.size(0); ++i) { - k.push_back(column[i].item()); - } - if (k != dataset[features[i]]) { - throw invalid_argument("Dataset and samples do not match"); - } - } - /* - - - */ completeFit(); } void Network::fit(const vector>& input_data, const vector& labels, const vector& featureNames, const string& className) @@ -147,6 +118,8 @@ namespace bayesnet { features = featureNames; this->className = className; dataset.clear(); + // Specific part + classNumStates = *max_element(labels.begin(), labels.end()) + 1; // Build dataset & tensor of samples samples = torch::zeros({ static_cast(input_data[0].size()), static_cast(input_data.size() + 1) }, torch::kInt32); for (int i = 0; i < featureNames.size(); ++i) { @@ -155,7 +128,6 @@ namespace bayesnet { } dataset[className] = labels; samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32)); - classNumStates = *max_element(labels.begin(), labels.end()) + 1; completeFit(); } void Network::completeFit()