diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index 2921f11..9d0a404 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -11,6 +11,9 @@ namespace bayesnet { vector virtual predict(vector>& X) = 0; float virtual score(vector>& X, vector& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; + int virtual getNumberOfNodes() = 0; + int virtual getNumberOfEdges() = 0; + int virtual getNumberOfStates() = 0; vector virtual show() = 0; vector virtual graph(string title = "") = 0; virtual ~BaseClassifier() = default; diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index c23e29e..d77c2c8 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -141,4 +141,8 @@ namespace bayesnet { { return fitted ? model.getEdges().size() : 0; } + int Classifier::getNumberOfStates() + { + return fitted ? model.getStates() : 0; + } } \ No newline at end of file diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h index 578b9ed..ad56336 100644 --- a/src/BayesNet/Classifier.h +++ b/src/BayesNet/Classifier.h @@ -32,8 +32,9 @@ namespace bayesnet { Classifier& fit(vector>& X, vector& y, vector& features, string className, map>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) override; void addNodes(); - int getNumberOfNodes(); - int getNumberOfEdges(); + int getNumberOfNodes() override; + int getNumberOfEdges() override; + int getNumberOfStates() override; Tensor predict(Tensor& X); vector predict(vector>& X) override; float score(Tensor& X, Tensor& y) override; diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index b8c6418..8aa2518 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -130,4 +130,28 @@ namespace bayesnet { } return result; } + int Ensemble::getNumberOfNodes() + { + int nodes = 0; + for (auto i = 0; i < n_models; ++i) { + nodes += models[i]->getNumberOfNodes(); + } + return nodes; + } + int Ensemble::getNumberOfEdges() + { + int edges = 0; + for (auto i = 0; i < n_models; ++i) { + edges += models[i]->getNumberOfEdges(); + } + return edges; + } + int Ensemble::getNumberOfStates() + { + int states = 0; + for (auto i = 0; i < n_models; ++i) { + states += models[i]->getNumberOfStates(); + } + return states; + } } \ No newline at end of file diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index e25ee75..d45575d 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -36,6 +36,9 @@ namespace bayesnet { vector predict(vector>& X) override; float score(Tensor& X, Tensor& y) override; float score(vector>& X, vector& y) override; + int getNumberOfNodes() override; + int getNumberOfEdges() override; + int getNumberOfStates() override; vector show() override; vector graph(string title) override; }; diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index aa1159d..583f460 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -2,36 +2,48 @@ namespace platform { using json = nlohmann::json; - string get_date_time() + string get_date() { time_t rawtime; tm* timeinfo; time(&rawtime); timeinfo = std::localtime(&rawtime); - std::ostringstream oss; - oss << std::put_time(timeinfo, "%Y-%m-%d_%H:%M:%S"); + oss << std::put_time(timeinfo, "%Y-%m-%d"); + return oss.str(); + } + string get_time() + { + time_t rawtime; + tm* timeinfo; + time(&rawtime); + timeinfo = std::localtime(&rawtime); + std::ostringstream oss; + oss << std::put_time(timeinfo, "%H:%M:%S"); return oss.str(); } string Experiment::get_file_name() { - string date_time = get_date_time(); - string result = "results_" + score_name + "_" + model + "_" + platform + "_" + date_time + "_" + (stratified ? "1" : "0") + ".json"; + string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; return result; } + json Experiment::build_json() { json result; result["title"] = title; + result["date"] = get_date(); + result["time"] = get_time(); result["model"] = model; + result["version"] = model_version; result["platform"] = platform; result["score_name"] = score_name; - result["model_version"] = model_version; + result["language"] = language; result["language_version"] = language_version; result["discretized"] = discretized; result["stratified"] = stratified; - result["nfolds"] = nfolds; - result["random_seeds"] = random_seeds; + result["folds"] = nfolds; + result["seeds"] = random_seeds; result["duration"] = duration; result["results"] = json::array(); for (auto& r : results) { @@ -43,12 +55,19 @@ namespace platform { j["classes"] = r.getClasses(); j["score_train"] = r.getScoreTrain(); j["score_test"] = r.getScoreTest(); + j["score"] = r.getScoreTest(); + j["score_std"] = r.getScoreTestStd(); j["score_train_std"] = r.getScoreTrainStd(); j["score_test_std"] = r.getScoreTestStd(); j["train_time"] = r.getTrainTime(); j["train_time_std"] = r.getTrainTimeStd(); j["test_time"] = r.getTestTime(); j["test_time_std"] = r.getTestTimeStd(); + j["time"] = r.getTestTime() + r.getTrainTime(); + j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd(); + j["nodes"] = r.getNodes(); + j["leaves"] = r.getLeaves(); + j["depth"] = r.getDepth(); result["results"].push_back(j); } return result; @@ -69,11 +88,16 @@ namespace platform { ); auto Xt = torch::transpose(X, 0, 1); auto result = Result(); + auto [values, counts] = at::_unique(y); + result.setSamples(X.size(0)).setFeatures(X.size(1)).setClasses(values.size(0)); auto k = fold->getNumberOfFolds(); auto accuracy_test = torch::zeros({ k }, torch::kFloat64); auto accuracy_train = torch::zeros({ k }, torch::kFloat64); auto train_time = torch::zeros({ k }, torch::kFloat64); auto test_time = torch::zeros({ k }, torch::kFloat64); + auto nodes = torch::zeros({ k }, torch::kFloat64); + auto edges = torch::zeros({ k }, torch::kFloat64); + auto num_states = torch::zeros({ k }, torch::kFloat64); Timer train_timer, test_timer; for (int i = 0; i < k; i++) { bayesnet::BaseClassifier* model = classifiers[model_name]; @@ -86,6 +110,9 @@ namespace platform { auto X_test = Xt.index({ "...", test_t }); auto y_test = y.index({ test_t }); model->fit(X_train, y_train, features, className, states); + nodes[i] = model->getNumberOfNodes(); + edges[i] = model->getNumberOfEdges(); + num_states[i] = model->getNumberOfStates(); cout << "Training Fold " << i + 1 << endl; cout << "X_train: " << X_train.sizes() << endl; cout << "y_train: " << y_train.sizes() << endl; @@ -102,6 +129,7 @@ namespace platform { result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); result.setScoreTestStd(torch::std(accuracy_test).item()).setScoreTrainStd(torch::std(accuracy_train).item()); result.setTrainTime(torch::mean(train_time).item()).setTestTime(torch::mean(test_time).item()); + result.setNodes(torch::mean(nodes).item()).setLeaves(torch::mean(edges).item()).setDepth(torch::mean(num_states).item()); return result; } } \ No newline at end of file diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h index ce68052..bfd3b5f 100644 --- a/src/Platform/Experiment.h +++ b/src/Platform/Experiment.h @@ -20,13 +20,14 @@ namespace platform { Timer() = default; ~Timer() = default; void start() { begin = chrono::high_resolution_clock::now(); } - float getDuration() { return chrono::duration_cast(chrono::high_resolution_clock::now() - begin).count(); } + float getDuration() { return chrono::duration_cast(chrono::high_resolution_clock::now() - begin).count() / 1000; } }; class Result { private: string dataset, hyperparameters; int samples, features, classes; float score_train, score_test, score_train_std, score_test_std, train_time, train_time_std, test_time, test_time_std; + float nodes, leaves, depth; public: Result() = default; Result& setDataset(string dataset) { this->dataset = dataset; return *this; } @@ -42,6 +43,9 @@ namespace platform { Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; } Result& setTestTime(float test_time) { this->test_time = test_time; return *this; } Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; } + Result& setNodes(float nodes) { this->nodes = nodes; return *this; } + Result& setLeaves(float leaves) { this->leaves = leaves; return *this; } + Result& setDepth(float depth) { this->depth = depth; return *this; } const float get_score_train() const { return score_train; } float get_score_test() { return score_test; } const string& getDataset() const { return dataset; } @@ -57,10 +61,13 @@ namespace platform { const float getTrainTimeStd() const { return train_time_std; } const float getTestTime() const { return test_time; } const float getTestTimeStd() const { return test_time_std; } + const float getNodes() const { return nodes; } + const float getLeaves() const { return leaves; } + const float getDepth() const { return depth; } }; class Experiment { private: - string title, model, platform, score_name, model_version, language_version; + string title, model, platform, score_name, model_version, language_version, language; bool discretized, stratified; vector results; vector random_seeds; @@ -74,6 +81,7 @@ namespace platform { Experiment& setPlatform(string platform) { this->platform = platform; return *this; } Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; } Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; } + Experiment& setLanguage(string language) { this->language = language; return *this; } Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; } Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } diff --git a/src/Platform/main.cc b/src/Platform/main.cc index b2f78b7..d54d8ea 100644 --- a/src/Platform/main.cc +++ b/src/Platform/main.cc @@ -18,8 +18,6 @@ using namespace std; - - int main(int argc, char** argv) { map datasets = { @@ -60,6 +58,7 @@ int main(int argc, char** argv) throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}"); } ); + program.add_argument("--title").required().help("Experiment title"); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { @@ -79,7 +78,7 @@ int main(int argc, char** argv) program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); bool class_last, discretize_dataset, stratified; int n_folds, seed; - string model_name, file_name, path, complete_file_name; + string model_name, file_name, path, complete_file_name, title; try { program.parse_args(argc, argv); file_name = program.get("dataset"); @@ -91,6 +90,7 @@ int main(int argc, char** argv) seed = program.get("seed"); complete_file_name = path + file_name + ".arff"; class_last = datasets[file_name]; + title = program.get("title"); if (!file_exists(complete_file_name)) { throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); } @@ -110,7 +110,8 @@ int main(int argc, char** argv) else fold = new KFold(n_folds, y.numel(), seed); auto experiment = platform::Experiment(); - experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp"); + experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0"); + experiment.setDiscretized(discretize_dataset).setModel(model_name).setModelVersion("1...0").setPlatform("BayesNet"); experiment.setStratified(stratified).setNFolds(n_folds).addRandomSeed(seed).setScoreName("accuracy"); platform::Timer timer; timer.start();