Fix some mistakes in tensors treatment
This commit is contained in:
parent
be06e475f0
commit
099b4bea09
20
.vscode/launch.json
vendored
20
.vscode/launch.json
vendored
@ -4,13 +4,19 @@
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "bayesnet",
|
||||
"program": "${workspaceFolder}/build/sample/main",
|
||||
"name": "sample",
|
||||
"program": "${workspaceFolder}/build/sample/BayesNetSample",
|
||||
"args": [
|
||||
"-f",
|
||||
"iris"
|
||||
"-d",
|
||||
"iris",
|
||||
"-m",
|
||||
"TAN",
|
||||
"-p",
|
||||
"../../data/",
|
||||
"--stratified",
|
||||
"--tensors"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"cwd": "${workspaceFolder}/build/sample/",
|
||||
"preLaunchTask": "CMake: build"
|
||||
},
|
||||
{
|
||||
@ -25,7 +31,9 @@
|
||||
"TAN",
|
||||
"-p",
|
||||
"../../../data/",
|
||||
"--discretize"
|
||||
"--discretize",
|
||||
"-f",
|
||||
"2"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build/src/Platform",
|
||||
},
|
||||
|
@ -3,5 +3,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
||||
add_executable(BayesNetSample sample.cc)
|
||||
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc)
|
||||
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
@ -1,4 +1,5 @@
|
||||
#include <iostream>
|
||||
#include <torch/torch.h>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <map>
|
||||
@ -12,6 +13,7 @@
|
||||
#include "SPODE.h"
|
||||
#include "AODE.h"
|
||||
#include "TAN.h"
|
||||
#include "Folding.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
@ -42,6 +44,21 @@ bool file_exists(const std::string& name)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y)
|
||||
{
|
||||
vector<vector<int>> Xr;
|
||||
vector<int> yr;
|
||||
for (int col = 0; col < X.size(); ++col) {
|
||||
Xr.push_back(vector<int>());
|
||||
}
|
||||
for (auto index : indices) {
|
||||
for (int col = 0; col < X.size(); ++col) {
|
||||
Xr[col].push_back(X[col][index]);
|
||||
}
|
||||
yr.push_back(y[index]);
|
||||
}
|
||||
return { Xr, yr };
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
@ -60,7 +77,7 @@ int main(int argc, char** argv)
|
||||
valid_datasets.push_back(dataset.first);
|
||||
}
|
||||
argparse::ArgumentParser program("BayesNetSample");
|
||||
program.add_argument("-f", "--file")
|
||||
program.add_argument("-d", "--dataset")
|
||||
.help("Dataset file name")
|
||||
.action([valid_datasets](const std::string& value) {
|
||||
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
|
||||
@ -83,14 +100,37 @@ int main(int argc, char** argv)
|
||||
throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}");
|
||||
}
|
||||
);
|
||||
bool class_last;
|
||||
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
||||
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
||||
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
|
||||
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
|
||||
try {
|
||||
auto k = stoi(value);
|
||||
if (k < 2) {
|
||||
throw runtime_error("Number of folds must be greater than 1");
|
||||
}
|
||||
return k;
|
||||
}
|
||||
catch (const runtime_error& err) {
|
||||
throw runtime_error(err.what());
|
||||
}
|
||||
catch (...) {
|
||||
throw runtime_error("Number of folds must be an integer");
|
||||
}});
|
||||
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
||||
bool class_last, stratified, tensors;
|
||||
string model_name, file_name, path, complete_file_name;
|
||||
int nFolds, seed;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
file_name = program.get<string>("file");
|
||||
file_name = program.get<string>("dataset");
|
||||
path = program.get<string>("path");
|
||||
model_name = program.get<string>("model");
|
||||
complete_file_name = path + file_name + ".arff";
|
||||
stratified = program.get<bool>("stratified");
|
||||
tensors = program.get<bool>("tensors");
|
||||
nFolds = program.get<int>("folds");
|
||||
seed = program.get<int>("seed");
|
||||
class_last = datasets[file_name];
|
||||
if (!file_exists(complete_file_name)) {
|
||||
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
||||
@ -144,5 +184,55 @@ int main(int argc, char** argv)
|
||||
file.close();
|
||||
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
|
||||
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
|
||||
string stratified_string = stratified ? " Stratified" : "";
|
||||
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
|
||||
cout << "==========================================" << endl;
|
||||
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
||||
torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||
}
|
||||
float total_score = 0, total_score_train = 0, score_train, score_test;
|
||||
Fold* fold;
|
||||
if (stratified)
|
||||
fold = new StratifiedKFold(nFolds, y, seed);
|
||||
else
|
||||
fold = new KFold(nFolds, y.size(), seed);
|
||||
for (auto i = 0; i < nFolds; ++i) {
|
||||
auto [train, test] = fold->getFold(i);
|
||||
cout << "Fold: " << i + 1 << endl;
|
||||
if (tensors) {
|
||||
cout << "Xt shape: " << Xt.sizes() << endl;
|
||||
cout << "yt shape: " << yt.sizes() << endl;
|
||||
auto ttrain = torch::tensor(train, torch::kInt64);
|
||||
auto ttest = torch::tensor(test, torch::kInt64);
|
||||
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
||||
torch::Tensor ytraint = yt.index({ ttrain });
|
||||
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
||||
torch::Tensor ytestt = yt.index({ ttest });
|
||||
cout << "Train: " << Xtraint.size(0) << " x " << Xtraint.size(1) << " " << ytraint.size(0) << endl;
|
||||
cout << "Test : " << Xtestt.size(0) << " x " << Xtestt.size(1) << " " << ytestt.size(0) << endl;
|
||||
clf->fit(Xtraint, ytraint, features, className, states);
|
||||
score_train = clf->score(Xtraint, ytraint);
|
||||
score_test = clf->score(Xtestt, ytestt);
|
||||
} else {
|
||||
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
||||
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
||||
cout << "Train: " << Xtrain.size() << " x " << Xtrain[0].size() << " " << ytrain.size() << endl;
|
||||
cout << "Test : " << Xtest.size() << " x " << Xtest[0].size() << " " << ytest.size() << endl;
|
||||
clf->fit(Xtrain, ytrain, features, className, states);
|
||||
score_train = clf->score(Xtrain, ytrain);
|
||||
score_test = clf->score(Xtest, ytest);
|
||||
}
|
||||
total_score_train += score_train;
|
||||
total_score += score_test;
|
||||
cout << "Score Train: " << score_train << endl;
|
||||
cout << "Score Test : " << score_test << endl;
|
||||
// cout << "-------------------------------------------------------------------------------" << endl;
|
||||
// total_score += score_value;
|
||||
}
|
||||
cout << "**********************************************************************************" << endl;
|
||||
cout << "Average Score Train: " << total_score_train / nFolds << endl;
|
||||
cout << "Average Score Test : " << total_score / nFolds << endl;
|
||||
return 0;
|
||||
}
|
@ -13,11 +13,11 @@ namespace bayesnet {
|
||||
, className(className)
|
||||
, classNumStates(classNumStates)
|
||||
{
|
||||
samples = torch::zeros({ static_cast<int64_t>(vsamples[0].size()), static_cast<int64_t>(vsamples.size() + 1) }, torch::kInt64);
|
||||
samples = torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32);
|
||||
for (int i = 0; i < vsamples.size(); ++i) {
|
||||
samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt64));
|
||||
samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
|
||||
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32));
|
||||
}
|
||||
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
|
||||
{
|
||||
@ -43,8 +43,8 @@ namespace bayesnet {
|
||||
margin[value] = mask.sum().item<float>() / samples.sizes()[0];
|
||||
}
|
||||
for (auto [first, second] : combinations) {
|
||||
int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
|
||||
int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
|
||||
int index_first = find(features.begin(), features.end(), first) - features.begin();
|
||||
int index_second = find(features.begin(), features.end(), second) - features.begin();
|
||||
double accumulated = 0;
|
||||
for (int value = 0; value < classNumStates; ++value) {
|
||||
auto mask = samples.index({ "...", -1 }) == value;
|
||||
|
@ -25,7 +25,7 @@ namespace bayesnet {
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = X;
|
||||
this->X = torch::transpose(X, 0, 1);
|
||||
this->y = y;
|
||||
Xv = vector<vector<int>>();
|
||||
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||
@ -34,12 +34,12 @@ namespace bayesnet {
|
||||
|
||||
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
|
||||
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
|
||||
Xv = X;
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
|
||||
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
|
||||
}
|
||||
this->y = torch::tensor(y, kInt64);
|
||||
this->y = torch::tensor(y, kInt32);
|
||||
yv = y;
|
||||
return build(features, className, states);
|
||||
}
|
||||
@ -77,7 +77,7 @@ namespace bayesnet {
|
||||
Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + m_);
|
||||
}
|
||||
auto yp = model.predict(Xd);
|
||||
auto ypred = torch::tensor(yp, torch::kInt64);
|
||||
auto ypred = torch::tensor(yp, torch::kInt32);
|
||||
return ypred;
|
||||
}
|
||||
vector<int> Classifier::predict(vector<vector<int>>& X)
|
||||
@ -121,6 +121,7 @@ namespace bayesnet {
|
||||
}
|
||||
void Classifier::addNodes()
|
||||
{
|
||||
auto test = model.getEdges();
|
||||
// Add all nodes to the network
|
||||
for (auto feature : features) {
|
||||
model.addNode(feature, states[feature].size());
|
||||
|
@ -32,12 +32,12 @@ namespace bayesnet {
|
||||
}
|
||||
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
|
||||
{
|
||||
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
|
||||
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
|
||||
Xv = X;
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
|
||||
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
|
||||
}
|
||||
this->y = torch::tensor(y, kInt64);
|
||||
this->y = torch::tensor(y, kInt32);
|
||||
yv = y;
|
||||
return build(features, className, states);
|
||||
}
|
||||
@ -46,7 +46,7 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64);
|
||||
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt32);
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
y_pred.index_put_({ "...", i }, models[i]->predict(X));
|
||||
}
|
||||
@ -54,7 +54,7 @@ namespace bayesnet {
|
||||
}
|
||||
vector<int> Ensemble::voting(Tensor& y_pred)
|
||||
{
|
||||
auto y_pred_ = y_pred.accessor<int64_t, 2>();
|
||||
auto y_pred_ = y_pred.accessor<int, 2>();
|
||||
vector<int> y_pred_final;
|
||||
for (int i = 0; i < y_pred.size(0); ++i) {
|
||||
vector<float> votes(states[className].size(), 0);
|
||||
@ -77,9 +77,9 @@ namespace bayesnet {
|
||||
for (auto i = 0; i < n_; i++) {
|
||||
Xd[i] = vector<int>(X[i].begin(), X[i].end());
|
||||
}
|
||||
Tensor y_pred = torch::zeros({ m_, n_models }, kInt64);
|
||||
Tensor y_pred = torch::zeros({ m_, n_models }, kInt32);
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64));
|
||||
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32));
|
||||
}
|
||||
return voting(y_pred);
|
||||
}
|
||||
|
@ -26,7 +26,8 @@ namespace bayesnet {
|
||||
features.push_back(name);
|
||||
}
|
||||
if (nodes.find(name) != nodes.end()) {
|
||||
// if node exists update its number of states
|
||||
// if node exists update its number of states and remove parents, children and CPT
|
||||
nodes[name]->clear();
|
||||
nodes[name]->setNumStates(numStates);
|
||||
return;
|
||||
}
|
||||
@ -88,7 +89,6 @@ namespace bayesnet {
|
||||
nodes[child]->removeParent(nodes[parent].get());
|
||||
throw invalid_argument("Adding this edge forms a cycle in the graph.");
|
||||
}
|
||||
|
||||
}
|
||||
map<string, std::unique_ptr<Node>>& Network::getNodes()
|
||||
{
|
||||
@ -96,23 +96,71 @@ namespace bayesnet {
|
||||
}
|
||||
void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& featureNames, const string& className)
|
||||
{
|
||||
this->fit(tensorToVector(X), vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)), featureNames, className);
|
||||
features = featureNames;
|
||||
this->className = className;
|
||||
dataset.clear();
|
||||
classNumStates = torch::max(y).item<int>() + 1;
|
||||
samples = torch::cat({ X, y.view({ y.size(0), 1 }) }, 1);
|
||||
for (int i = 0; i < featureNames.size(); ++i) {
|
||||
auto column = torch::flatten(X.index({ "...", i }));
|
||||
auto k = vector<int>();
|
||||
for (auto i = 0; i < X.size(0); ++i) {
|
||||
k.push_back(column[i].item<int>());
|
||||
}
|
||||
dataset[featureNames[i]] = k;
|
||||
}
|
||||
dataset[className] = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||
// //
|
||||
// // Check if data is ok
|
||||
// cout << "******************************************************************" << endl;
|
||||
// cout << "Check samples, sizes: " << samples.sizes() << endl;
|
||||
// for (auto i = 0; i < features.size(); ++i) {
|
||||
// cout << featureNames[i] << ": " << nodes[featureNames[i]]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", i })).item<int>() + 1 << " dataset" << *max_element(dataset[featureNames[i]].begin(), dataset[featureNames[i]].end()) + 1 << endl;
|
||||
// }
|
||||
// cout << className << ": " << nodes[className]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", -1 })) + 1 << endl;
|
||||
// cout << "******************************************************************" << endl;
|
||||
// //
|
||||
// //
|
||||
/*
|
||||
|
||||
|
||||
*/
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
cout << "Checking " << features[i] << endl;
|
||||
auto column = torch::flatten(X.index({ "...", i }));
|
||||
auto k = vector<int>();
|
||||
for (auto i = 0; i < X.size(0); ++i) {
|
||||
k.push_back(column[i].item<int>());
|
||||
}
|
||||
if (k != dataset[features[i]]) {
|
||||
throw invalid_argument("Dataset and samples do not match");
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
||||
|
||||
*/
|
||||
completeFit();
|
||||
}
|
||||
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
|
||||
{
|
||||
features = featureNames;
|
||||
this->className = className;
|
||||
dataset.clear();
|
||||
|
||||
// Build dataset & tensor of samples
|
||||
samples = torch::zeros({ static_cast<int64_t>(input_data[0].size()), static_cast<int64_t>(input_data.size() + 1) }, torch::kInt64);
|
||||
samples = torch::zeros({ static_cast<int>(input_data[0].size()), static_cast<int>(input_data.size() + 1) }, torch::kInt32);
|
||||
for (int i = 0; i < featureNames.size(); ++i) {
|
||||
dataset[featureNames[i]] = input_data[i];
|
||||
samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64));
|
||||
samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt32));
|
||||
}
|
||||
dataset[className] = labels;
|
||||
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
|
||||
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32));
|
||||
classNumStates = *max_element(labels.begin(), labels.end()) + 1;
|
||||
completeFit();
|
||||
}
|
||||
void Network::completeFit()
|
||||
{
|
||||
|
||||
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
|
||||
if (maxThreadsRunning < 1) {
|
||||
maxThreadsRunning = 1;
|
||||
@ -122,15 +170,12 @@ namespace bayesnet {
|
||||
condition_variable cv;
|
||||
int activeThreads = 0;
|
||||
int nextNodeIndex = 0;
|
||||
|
||||
while (nextNodeIndex < nodes.size()) {
|
||||
unique_lock<mutex> lock(mtx);
|
||||
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
|
||||
|
||||
if (nextNodeIndex >= nodes.size()) {
|
||||
break; // No more work remaining
|
||||
}
|
||||
|
||||
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() {
|
||||
while (true) {
|
||||
unique_lock<mutex> lock(mtx);
|
||||
@ -140,7 +185,6 @@ namespace bayesnet {
|
||||
auto& pair = *std::next(nodes.begin(), nextNodeIndex);
|
||||
++nextNodeIndex;
|
||||
lock.unlock();
|
||||
|
||||
pair.second->computeCPT(dataset, laplaceSmoothing);
|
||||
lock.lock();
|
||||
nodes[pair.first] = std::move(pair.second);
|
||||
@ -150,7 +194,6 @@ namespace bayesnet {
|
||||
--activeThreads;
|
||||
cv.notify_one();
|
||||
});
|
||||
|
||||
++activeThreads;
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
@ -218,7 +261,6 @@ namespace bayesnet {
|
||||
evidence[features[i]] = sample[i];
|
||||
}
|
||||
return exactInference(evidence);
|
||||
|
||||
}
|
||||
double Network::computeFactor(map<string, int>& completeEvidence)
|
||||
{
|
||||
@ -292,5 +334,4 @@ namespace bayesnet {
|
||||
}
|
||||
return edges;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ namespace bayesnet {
|
||||
double entropy(torch::Tensor&);
|
||||
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
|
||||
double mutualInformation(torch::Tensor&, torch::Tensor&);
|
||||
void completeFit();
|
||||
public:
|
||||
Network();
|
||||
Network(float, int);
|
||||
|
@ -6,6 +6,14 @@ namespace bayesnet {
|
||||
: name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
|
||||
{
|
||||
}
|
||||
void Node::clear()
|
||||
{
|
||||
parents.clear();
|
||||
children.clear();
|
||||
cpTable = torch::Tensor();
|
||||
dimensions.clear();
|
||||
numStates = 0;
|
||||
}
|
||||
string Node::getName() const
|
||||
{
|
||||
return name;
|
||||
|
@ -17,6 +17,7 @@ namespace bayesnet {
|
||||
public:
|
||||
vector<pair<string, string>> combinations(const vector<string>&);
|
||||
Node(const std::string&, int);
|
||||
void clear();
|
||||
void addParent(Node*);
|
||||
void addChild(Node*);
|
||||
void removeParent(Node*);
|
||||
|
@ -3,7 +3,7 @@
|
||||
namespace bayesnet {
|
||||
using namespace torch;
|
||||
|
||||
TAN::TAN() : Classifier(Network()) {}
|
||||
TAN::TAN() : Classifier(Network(0.1)) {}
|
||||
|
||||
void TAN::train()
|
||||
{
|
||||
|
@ -18,7 +18,7 @@ namespace bayesnet {
|
||||
// Iterate over cols
|
||||
for (int i = 0; i < tensor.size(1); ++i) {
|
||||
auto col_tensor = tensor.index({ "...", i });
|
||||
auto col = vector<int>(col_tensor.data_ptr<int64_t>(), col_tensor.data_ptr<int64_t>() + tensor.size(0));
|
||||
auto col = vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
|
||||
result.push_back(col);
|
||||
}
|
||||
return result;
|
||||
|
@ -5,4 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
||||
add_executable(main Experiment.cc Folding.cc platformUtils.cc)
|
||||
add_executable(testx testx.cpp Folding.cc)
|
||||
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(testx ArffFiles mdlp "${TORCH_LIBRARIES}")
|
@ -18,8 +18,13 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||
Result cross_validation(Fold* fold, string model_name, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
|
||||
{
|
||||
auto classifiers = map<string, bayesnet::BaseClassifier*>({
|
||||
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
|
||||
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
|
||||
}
|
||||
);
|
||||
auto result = Result();
|
||||
auto k = fold->getNumberOfFolds();
|
||||
auto accuracy = torch::zeros({ k }, kFloat64);
|
||||
@ -27,6 +32,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X,
|
||||
auto test_time = torch::zeros({ k }, kFloat64);
|
||||
Timer train_timer, test_timer;
|
||||
for (int i = 0; i < k; i++) {
|
||||
bayesnet::BaseClassifier* model = classifiers[model_name];
|
||||
train_timer.start();
|
||||
auto [train, test] = fold->getFold(i);
|
||||
auto train_t = torch::tensor(train);
|
||||
@ -43,8 +49,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X,
|
||||
cout << "y_test: " << y_test.sizes() << endl;
|
||||
train_time[i] = train_timer.getDuration();
|
||||
test_timer.start();
|
||||
//auto acc = model->score(X_test, y_test);
|
||||
auto acc = 7;
|
||||
auto acc = model->score(X_test, y_test);
|
||||
test_time[i] = test_timer.getDuration();
|
||||
accuracy[i] = acc;
|
||||
}
|
||||
@ -140,18 +145,16 @@ int main(int argc, char** argv)
|
||||
fold = new StratifiedKFold(n_folds, y, -1);
|
||||
else
|
||||
fold = new KFold(n_folds, y.numel(), -1);
|
||||
auto classifiers = map<string, bayesnet::BaseClassifier*>({
|
||||
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
|
||||
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
|
||||
}
|
||||
);
|
||||
|
||||
auto experiment = Experiment();
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp");
|
||||
experiment.setStratified(stratified).setNFolds(5).addRandomSeed(271).setScoreName("accuracy");
|
||||
bayesnet::BaseClassifier* model = classifiers[model_name];
|
||||
auto result = cross_validation(fold, model, X, y, features, className, states);
|
||||
auto result = cross_validation(fold, model_name, X, y, features, className, states);
|
||||
result.setDataset(file_name);
|
||||
experiment.addResult(result);
|
||||
experiment.save(path);
|
||||
for (auto& item : states) {
|
||||
cout << item.first << ": " << item.second.size() << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,13 +1,16 @@
|
||||
#include "Folding.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <random>
|
||||
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
|
||||
{
|
||||
random_device rd;
|
||||
random_seed = default_random_engine(seed == -1 ? rd() : seed);
|
||||
srand(seed == -1 ? time(0) : seed);
|
||||
}
|
||||
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed)
|
||||
{
|
||||
indices = vector<int>(n);
|
||||
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
|
||||
random_device rd;
|
||||
default_random_engine random_seed(seed == -1 ? rd() : seed);
|
||||
shuffle(indices.begin(), indices.end(), random_seed);
|
||||
}
|
||||
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
|
||||
@ -54,8 +57,6 @@ void StratifiedKFold::build()
|
||||
class_indices[y[i]].push_back(i);
|
||||
}
|
||||
// Shuffle class indices
|
||||
random_device rd;
|
||||
default_random_engine random_seed(seed == -1 ? rd() : seed);
|
||||
for (auto& [cls, indices] : class_indices) {
|
||||
shuffle(indices.begin(), indices.end(), random_seed);
|
||||
}
|
||||
@ -71,7 +72,7 @@ void StratifiedKFold::build()
|
||||
class_indices[label].erase(class_indices[label].begin(), it);
|
||||
}
|
||||
while (remainder_samples_to_take > 0) {
|
||||
int fold = (arc4random() % static_cast<int>(k));
|
||||
int fold = (rand() % static_cast<int>(k));
|
||||
if (stratified_indices[fold].size() == fold_size) {
|
||||
continue;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define FOLDING_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
using namespace std;
|
||||
|
||||
class Fold {
|
||||
@ -9,8 +10,9 @@ protected:
|
||||
int k;
|
||||
int n;
|
||||
int seed;
|
||||
default_random_engine random_seed;
|
||||
public:
|
||||
Fold(int k, int n, int seed = -1) : k(k), n(n), seed(seed) {}
|
||||
Fold(int k, int n, int seed = -1);
|
||||
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
|
||||
virtual ~Fold() = default;
|
||||
int getNumberOfFolds() { return k; }
|
||||
|
@ -55,16 +55,16 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
|
||||
auto states = map<string, vector<int>>();
|
||||
if (discretize_dataset) {
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
Xd = torch::zeros({ static_cast<int64_t>(Xr[0].size()), static_cast<int64_t>(Xr.size()) }, torch::kInt64);
|
||||
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
iota(begin(states[features[i]]), end(states[features[i]]), 0);
|
||||
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64));
|
||||
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
|
||||
}
|
||||
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states[className]), end(states[className]), 0);
|
||||
} else {
|
||||
Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat32);
|
||||
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
|
||||
}
|
||||
|
@ -62,14 +62,40 @@ int main()
|
||||
cout << endl;
|
||||
cout << "Test Statistics: " << counts(y, test);
|
||||
cout << "==============================================================================" << endl;
|
||||
torch::Tensor a = torch::zeros({ 5, 3 });
|
||||
torch::Tensor b = torch::zeros({ 5 }) + 1;
|
||||
torch::Tensor c = torch::cat({ a, b.view({5, 1}) }, 1);
|
||||
cout << "a:" << a.sizes() << endl;
|
||||
cout << a << endl;
|
||||
cout << "b:" << b.sizes() << endl;
|
||||
cout << b << endl;
|
||||
cout << "c:" << c.sizes() << endl;
|
||||
cout << c << endl;
|
||||
torch::Tensor d = torch::zeros({ 5, 3 });
|
||||
torch::Tensor e = torch::tensor({ 1,2,3,4,5 }) + 1;
|
||||
torch::Tensor f = torch::cat({ d, e.view({5, 1}) }, 1);
|
||||
cout << "d:" << d.sizes() << endl;
|
||||
cout << d << endl;
|
||||
cout << "e:" << e.sizes() << endl;
|
||||
cout << e << endl;
|
||||
cout << "f:" << f.sizes() << endl;
|
||||
cout << f << endl;
|
||||
auto indices = torch::tensor({ 0, 2, 4 });
|
||||
auto k = f.index({ indices, "..." });
|
||||
cout << "k:" << k.sizes() << endl;
|
||||
cout << k << endl;
|
||||
auto w = torch::index_select(f, 0, indices);
|
||||
cout << "w:" << w.sizes() << endl;
|
||||
cout << w << endl;
|
||||
|
||||
// cout << "Vector poly" << endl;
|
||||
// auto some = vector<A>();
|
||||
// auto cx = C(5, 4);
|
||||
// auto bx = B(7, 6);
|
||||
// some.push_back(cx);
|
||||
// some.push_back(bx);
|
||||
// for (auto& obj : some) {
|
||||
// cout << "Obj :" << obj.getA() << endl;
|
||||
// }
|
||||
// auto some = vector<A>();
|
||||
// auto cx = C(5, 4);
|
||||
// auto bx = B(7, 6);
|
||||
// some.push_back(cx);
|
||||
// some.push_back(bx);
|
||||
// for (auto& obj : some) {
|
||||
// cout << "Obj :" << obj.getA() << endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user