diff --git a/.vscode/launch.json b/.vscode/launch.json index 415f773..e881ccf 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -19,7 +19,7 @@ "name": "experiment", "program": "${workspaceFolder}/build/src/Platform/main", "args": [ - "-f", + "-d", "iris", "-m", "TAN", diff --git a/src/BayesNet/bayesnetUtils.cc b/src/BayesNet/bayesnetUtils.cc index 2e1176d..66502aa 100644 --- a/src/BayesNet/bayesnetUtils.cc +++ b/src/BayesNet/bayesnetUtils.cc @@ -11,21 +11,16 @@ namespace bayesnet { sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); return indices; } - vector> tensorToVector(const Tensor& tensor) + vector> tensorToVector(Tensor& tensor) { // convert mxn tensor to nxm vector vector> result; - auto tensor_accessor = tensor.accessor(); - - // Iterate over columns and rows of the tensor - for (int j = 0; j < tensor.size(1); ++j) { - vector column; - for (int i = 0; i < tensor.size(0); ++i) { - column.push_back(tensor_accessor[i][j]); - } - result.push_back(column); + // Iterate over cols + for (int i = 0; i < tensor.size(1); ++i) { + auto col_tensor = tensor.index({ "...", i }); + auto col = vector(col_tensor.data_ptr(), col_tensor.data_ptr() + tensor.size(0)); + result.push_back(col); } - return result; } } \ No newline at end of file diff --git a/src/BayesNet/bayesnetUtils.h b/src/BayesNet/bayesnetUtils.h index bb03ca7..adfa8d7 100644 --- a/src/BayesNet/bayesnetUtils.h +++ b/src/BayesNet/bayesnetUtils.h @@ -6,6 +6,6 @@ namespace bayesnet { using namespace std; using namespace torch; vector argsort(vector& nums); - vector> tensorToVector(const Tensor& tensor); + vector> tensorToVector(Tensor& tensor); } #endif //BAYESNET_UTILS_H \ No newline at end of file diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index 27a2ef7..c0ede52 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -31,14 +31,20 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, auto [train, test] = fold->getFold(i); auto train_t = torch::tensor(train); auto test_t = torch::tensor(test); - auto X_train = X.index({ train_t }); + auto X_train = X.index({ train_t, "..." }); auto y_train = y.index({ train_t }); - auto X_test = X.index({ test_t }); + auto X_test = X.index({ test_t, "..." }); auto y_test = y.index({ test_t }); model->fit(X_train, y_train, features, className, states); + cout << "Training Fold " << i + 1 << endl; + cout << "X_train: " << X_train.sizes() << endl; + cout << "y_train: " << y_train.sizes() << endl; + cout << "X_test: " << X_test.sizes() << endl; + cout << "y_test: " << y_test.sizes() << endl; train_time[i] = train_timer.getDuration(); test_timer.start(); - auto acc = model->score(X_test, y_test); + //auto acc = model->score(X_test, y_test); + auto acc = 7; test_time[i] = test_timer.getDuration(); accuracy[i] = acc; } @@ -64,7 +70,7 @@ int main(int argc, char** argv) valid_datasets.push_back(dataset.first); } argparse::ArgumentParser program("BayesNetSample"); - program.add_argument("-f", "--file") + program.add_argument("-d", "--dataset") .help("Dataset file name") .action([valid_datasets](const std::string& value) { if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { @@ -74,7 +80,7 @@ int main(int argc, char** argv) } ); program.add_argument("-p", "--path") - .help(" folder where the data files are located, default") + .help("folder where the data files are located, default") .default_value(string{ PATH } ); program.add_argument("-m", "--model") @@ -87,15 +93,33 @@ int main(int argc, char** argv) throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}"); } ); - program.add_argument("--discretize").default_value(false).implicit_value(true); - bool class_last, discretize_dataset; + program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); + program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); + program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { + try { + auto k = stoi(value); + if (k < 2) { + throw runtime_error("Number of folds must be greater than 1"); + } + return k; + } + catch (const runtime_error& err) { + throw runtime_error(err.what()); + } + catch (...) { + throw runtime_error("Number of folds must be an integer"); + }}); + bool class_last, discretize_dataset, stratified; + int n_folds; string model_name, file_name, path, complete_file_name; try { program.parse_args(argc, argv); - file_name = program.get("file"); + file_name = program.get("dataset"); path = program.get("path"); model_name = program.get("model"); discretize_dataset = program.get("discretize"); + stratified = program.get("stratified"); + n_folds = program.get("folds"); complete_file_name = path + file_name + ".arff"; class_last = datasets[file_name]; if (!file_exists(complete_file_name)) { @@ -111,7 +135,11 @@ int main(int argc, char** argv) * Begin Processing */ auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset); - auto fold = StratifiedKFold(5, y, -1); + Fold* fold; + if (stratified) + fold = new StratifiedKFold(n_folds, y, -1); + else + fold = new KFold(n_folds, y.numel(), -1); auto classifiers = map({ { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } @@ -119,9 +147,9 @@ int main(int argc, char** argv) ); auto experiment = Experiment(); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp"); - experiment.setStratified(true).setNFolds(5).addRandomSeed(271).setScoreName("accuracy"); + experiment.setStratified(stratified).setNFolds(5).addRandomSeed(271).setScoreName("accuracy"); bayesnet::BaseClassifier* model = classifiers[model_name]; - auto result = cross_validation(&fold, model, X, y, features, className, states); + auto result = cross_validation(fold, model, X, y, features, className, states); result.setDataset(file_name); experiment.addResult(result); experiment.save(path); diff --git a/src/Platform/m b/src/Platform/m deleted file mode 100755 index 0323306..0000000 Binary files a/src/Platform/m and /dev/null differ