Solve Ensemble models exceptions on certain datasets #7

Merged
rmontanana merged 3 commits from solveexceptions into main 2023-09-02 15:29:34 +00:00
15 changed files with 33 additions and 67 deletions

9
.vscode/launch.json vendored
View File

@ -25,16 +25,17 @@
"program": "${workspaceFolder}/build/src/Platform/main",
"args": [
"-m",
"AODELd",
"AODE",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets",
"/home/rmontanana/Code/discretizbench/datasets",
"--stratified",
"-d",
"wine"
"mfeat-morphological",
"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
"type": "lldb",

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20)
project(BayesNet
VERSION 0.1.0
VERSION 0.2.0
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX
@ -40,8 +40,7 @@ if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)

View File

@ -1 +0,0 @@
null

BIN
diagrams/BayesNet.pdf Executable file

Binary file not shown.

View File

@ -25,7 +25,7 @@ namespace bayesnet {
int virtual getNumberOfStates() const = 0;
vector<string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") const = 0;
const string inline getVersion() const { return "0.1.0"; };
const string inline getVersion() const { return "0.2.0"; };
vector<string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;

View File

@ -13,7 +13,7 @@ namespace bayesnet {
m = dataset.size(1);
n = dataset.size(0) - 1;
checkFitParameters();
auto n_classes = states[className].size();
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
model.initialize();
buildModel(weights);

View File

@ -17,9 +17,13 @@ namespace bayesnet {
{
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) {
vector<double> votes(y_pred.size(1), 0);
for (int j = 0; j < y_pred.size(1); ++j) {
// votes store in each index (value of class) the significance added by each model
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels[j];
}
// argsort in descending order
@ -34,7 +38,6 @@ namespace bayesnet {
throw logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
//Create a threadpool
auto threads{ vector<thread>() };
mutex mtx;
for (auto i = 0; i < n_models; ++i) {

View File

@ -174,42 +174,10 @@ namespace bayesnet {
{
setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
if (maxThreadsRunning < 1) {
maxThreadsRunning = 1;
for (auto& node : nodes) {
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
fitted = true;
}
vector<thread> threads;
mutex mtx;
condition_variable cv;
int activeThreads = 0;
int nextNodeIndex = 0;
while (nextNodeIndex < nodes.size()) {
unique_lock<mutex> lock(mtx);
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads, &weights]() {
while (true) {
unique_lock<mutex> lock(mtx);
if (nextNodeIndex >= nodes.size()) {
break; // No more work remaining
}
auto& pair = *std::next(nodes.begin(), nextNodeIndex);
++nextNodeIndex;
lock.unlock();
pair.second->computeCPT(samples, features, laplaceSmoothing, weights);
lock.lock();
nodes[pair.first] = std::move(pair.second);
lock.unlock();
}
lock_guard<mutex> lock(mtx);
--activeThreads;
cv.notify_one();
});
++activeThreads;
}
for (auto& thread : threads) {
thread.join();
}
fitted = true;
}
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{

View File

@ -27,6 +27,7 @@ namespace bayesnet {
Network();
explicit Network(float);
explicit Network(Network&);
~Network() = default;
torch::Tensor& getSamples();
float getmaxThreads();
void addNode(const string&);
@ -52,7 +53,7 @@ namespace bayesnet {
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
inline string version() { return "0.1.0"; }
inline string version() { return "0.2.0"; }
};
}
#endif

View File

@ -100,7 +100,7 @@ namespace bayesnet {
}
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
torch::List<c10::optional<torch::Tensor>> coordinates;
c10::List<c10::optional<at::Tensor>> coordinates;
coordinates.push_back(dataset.index({ name_index, n_sample }));
for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName());
@ -118,10 +118,10 @@ namespace bayesnet {
}
float Node::getFactorValue(map<string, int>& evidence)
{
torch::List<c10::optional<torch::Tensor>> coordinates;
c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(torch::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return torch::tensor(evidence[parent->getName()]); });
coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>();
}
vector<string> Node::graph(const string& className)

View File

@ -53,15 +53,6 @@ namespace bayesnet {
auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv);
//
//
//
// auto tmp = discretizers[feature]->transform(xvf);
// Xv[index] = tmp;
// auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
// iota(xStates.begin(), xStates.end(), 0);
// //Update new states of the feature/node
// states[feature] = xStates;
}
if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers

View File

@ -213,10 +213,11 @@ namespace platform {
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
}
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
iota(begin(states.at(className)), end(states.at(className)), 0);
}
void Dataset::load_arff()
{

View File

@ -179,8 +179,10 @@ namespace platform {
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
clf.reset();
}
cout << "end. " << flush;
delete fold;
}
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());

View File

@ -26,7 +26,7 @@ namespace platform {
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return shared_ptr<bayesnet::BaseClassifier>(instance);
return unique_ptr<bayesnet::BaseClassifier>(instance);
else
return nullptr;
}

View File

@ -69,11 +69,12 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
iota(begin(states.at(className)), end(states.at(className)), 0);
} else {
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {