Fix some mistakes in tensors treatment

This commit is contained in:
Ricardo Montañana Gómez 2023-07-26 01:39:01 +02:00
parent be06e475f0
commit 099b4bea09
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
18 changed files with 255 additions and 72 deletions

20
.vscode/launch.json vendored
View File

@ -4,13 +4,19 @@
{
"type": "lldb",
"request": "launch",
"name": "bayesnet",
"program": "${workspaceFolder}/build/sample/main",
"name": "sample",
"program": "${workspaceFolder}/build/sample/BayesNetSample",
"args": [
"-f",
"iris"
"-d",
"iris",
"-m",
"TAN",
"-p",
"../../data/",
"--stratified",
"--tensors"
],
"cwd": "${workspaceFolder}",
"cwd": "${workspaceFolder}/build/sample/",
"preLaunchTask": "CMake: build"
},
{
@ -25,7 +31,9 @@
"TAN",
"-p",
"../../../data/",
"--discretize"
"--discretize",
"-f",
"2"
],
"cwd": "${workspaceFolder}/build/src/Platform",
},

View File

@ -3,5 +3,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
add_executable(BayesNetSample sample.cc)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@ -1,4 +1,5 @@
#include <iostream>
#include <torch/torch.h>
#include <string>
#include <thread>
#include <map>
@ -12,6 +13,7 @@
#include "SPODE.h"
#include "AODE.h"
#include "TAN.h"
#include "Folding.h"
using namespace std;
@ -42,6 +44,21 @@ bool file_exists(const std::string& name)
return false;
}
}
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y)
{
vector<vector<int>> Xr;
vector<int> yr;
for (int col = 0; col < X.size(); ++col) {
Xr.push_back(vector<int>());
}
for (auto index : indices) {
for (int col = 0; col < X.size(); ++col) {
Xr[col].push_back(X[col][index]);
}
yr.push_back(y[index]);
}
return { Xr, yr };
}
int main(int argc, char** argv)
{
@ -60,7 +77,7 @@ int main(int argc, char** argv)
valid_datasets.push_back(dataset.first);
}
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-f", "--file")
program.add_argument("-d", "--dataset")
.help("Dataset file name")
.action([valid_datasets](const std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
@ -83,14 +100,37 @@ int main(int argc, char** argv)
throw runtime_error("Model must be one of {AODE, KDB, SPODE, TAN}");
}
);
bool class_last;
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors;
string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
program.parse_args(argc, argv);
file_name = program.get<string>("file");
file_name = program.get<string>("dataset");
path = program.get<string>("path");
model_name = program.get<string>("model");
complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
@ -144,5 +184,55 @@ int main(int argc, char** argv)
file.close();
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
string stratified_string = stratified ? " Stratified" : "";
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
cout << "==========================================" << endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
Fold* fold;
if (stratified)
fold = new StratifiedKFold(nFolds, y, seed);
else
fold = new KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
cout << "Fold: " << i + 1 << endl;
if (tensors) {
cout << "Xt shape: " << Xt.sizes() << endl;
cout << "yt shape: " << yt.sizes() << endl;
auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
cout << "Train: " << Xtraint.size(0) << " x " << Xtraint.size(1) << " " << ytraint.size(0) << endl;
cout << "Test : " << Xtestt.size(0) << " x " << Xtestt.size(1) << " " << ytestt.size(0) << endl;
clf->fit(Xtraint, ytraint, features, className, states);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y);
cout << "Train: " << Xtrain.size() << " x " << Xtrain[0].size() << " " << ytrain.size() << endl;
cout << "Test : " << Xtest.size() << " x " << Xtest[0].size() << " " << ytest.size() << endl;
clf->fit(Xtrain, ytrain, features, className, states);
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
}
total_score_train += score_train;
total_score += score_test;
cout << "Score Train: " << score_train << endl;
cout << "Score Test : " << score_test << endl;
// cout << "-------------------------------------------------------------------------------" << endl;
// total_score += score_value;
}
cout << "**********************************************************************************" << endl;
cout << "Average Score Train: " << total_score_train / nFolds << endl;
cout << "Average Score Test : " << total_score / nFolds << endl;
return 0;
}

View File

@ -13,11 +13,11 @@ namespace bayesnet {
, className(className)
, classNumStates(classNumStates)
{
samples = torch::zeros({ static_cast<int64_t>(vsamples[0].size()), static_cast<int64_t>(vsamples.size() + 1) }, torch::kInt64);
samples = torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32);
for (int i = 0; i < vsamples.size(); ++i) {
samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt64));
samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt32));
}
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32));
}
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
{
@ -43,8 +43,8 @@ namespace bayesnet {
margin[value] = mask.sum().item<float>() / samples.sizes()[0];
}
for (auto [first, second] : combinations) {
int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
int index_first = find(features.begin(), features.end(), first) - features.begin();
int index_second = find(features.begin(), features.end(), second) - features.begin();
double accumulated = 0;
for (int value = 0; value < classNumStates; ++value) {
auto mask = samples.index({ "...", -1 }) == value;

View File

@ -25,7 +25,7 @@ namespace bayesnet {
}
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = X;
this->X = torch::transpose(X, 0, 1);
this->y = y;
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
@ -34,12 +34,12 @@ namespace bayesnet {
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
Xv = X;
for (int i = 0; i < X.size(); ++i) {
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
}
this->y = torch::tensor(y, kInt64);
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
}
@ -77,7 +77,7 @@ namespace bayesnet {
Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + m_);
}
auto yp = model.predict(Xd);
auto ypred = torch::tensor(yp, torch::kInt64);
auto ypred = torch::tensor(yp, torch::kInt32);
return ypred;
}
vector<int> Classifier::predict(vector<vector<int>>& X)
@ -121,6 +121,7 @@ namespace bayesnet {
}
void Classifier::addNodes()
{
auto test = model.getEdges();
// Add all nodes to the network
for (auto feature : features) {
model.addNode(feature, states[feature].size());

View File

@ -32,12 +32,12 @@ namespace bayesnet {
}
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
Xv = X;
for (int i = 0; i < X.size(); ++i) {
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
}
this->y = torch::tensor(y, kInt64);
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
}
@ -46,7 +46,7 @@ namespace bayesnet {
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64);
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt32);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, models[i]->predict(X));
}
@ -54,7 +54,7 @@ namespace bayesnet {
}
vector<int> Ensemble::voting(Tensor& y_pred)
{
auto y_pred_ = y_pred.accessor<int64_t, 2>();
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
for (int i = 0; i < y_pred.size(0); ++i) {
vector<float> votes(states[className].size(), 0);
@ -77,9 +77,9 @@ namespace bayesnet {
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
}
Tensor y_pred = torch::zeros({ m_, n_models }, kInt64);
Tensor y_pred = torch::zeros({ m_, n_models }, kInt32);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64));
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32));
}
return voting(y_pred);
}

View File

@ -26,7 +26,8 @@ namespace bayesnet {
features.push_back(name);
}
if (nodes.find(name) != nodes.end()) {
// if node exists update its number of states
// if node exists update its number of states and remove parents, children and CPT
nodes[name]->clear();
nodes[name]->setNumStates(numStates);
return;
}
@ -88,7 +89,6 @@ namespace bayesnet {
nodes[child]->removeParent(nodes[parent].get());
throw invalid_argument("Adding this edge forms a cycle in the graph.");
}
}
map<string, std::unique_ptr<Node>>& Network::getNodes()
{
@ -96,23 +96,71 @@ namespace bayesnet {
}
void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& featureNames, const string& className)
{
this->fit(tensorToVector(X), vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)), featureNames, className);
features = featureNames;
this->className = className;
dataset.clear();
classNumStates = torch::max(y).item<int>() + 1;
samples = torch::cat({ X, y.view({ y.size(0), 1 }) }, 1);
for (int i = 0; i < featureNames.size(); ++i) {
auto column = torch::flatten(X.index({ "...", i }));
auto k = vector<int>();
for (auto i = 0; i < X.size(0); ++i) {
k.push_back(column[i].item<int>());
}
dataset[featureNames[i]] = k;
}
dataset[className] = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// //
// // Check if data is ok
// cout << "******************************************************************" << endl;
// cout << "Check samples, sizes: " << samples.sizes() << endl;
// for (auto i = 0; i < features.size(); ++i) {
// cout << featureNames[i] << ": " << nodes[featureNames[i]]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", i })).item<int>() + 1 << " dataset" << *max_element(dataset[featureNames[i]].begin(), dataset[featureNames[i]].end()) + 1 << endl;
// }
// cout << className << ": " << nodes[className]->getNumStates() << ": torch:max " << torch::max(samples.index({ "...", -1 })) + 1 << endl;
// cout << "******************************************************************" << endl;
// //
// //
/*
*/
for (int i = 0; i < features.size(); ++i) {
cout << "Checking " << features[i] << endl;
auto column = torch::flatten(X.index({ "...", i }));
auto k = vector<int>();
for (auto i = 0; i < X.size(0); ++i) {
k.push_back(column[i].item<int>());
}
if (k != dataset[features[i]]) {
throw invalid_argument("Dataset and samples do not match");
}
}
/*
*/
completeFit();
}
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
{
features = featureNames;
this->className = className;
dataset.clear();
// Build dataset & tensor of samples
samples = torch::zeros({ static_cast<int64_t>(input_data[0].size()), static_cast<int64_t>(input_data.size() + 1) }, torch::kInt64);
samples = torch::zeros({ static_cast<int>(input_data[0].size()), static_cast<int>(input_data.size() + 1) }, torch::kInt32);
for (int i = 0; i < featureNames.size(); ++i) {
dataset[featureNames[i]] = input_data[i];
samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64));
samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt32));
}
dataset[className] = labels;
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32));
classNumStates = *max_element(labels.begin(), labels.end()) + 1;
completeFit();
}
void Network::completeFit()
{
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
if (maxThreadsRunning < 1) {
maxThreadsRunning = 1;
@ -122,15 +170,12 @@ namespace bayesnet {
condition_variable cv;
int activeThreads = 0;
int nextNodeIndex = 0;
while (nextNodeIndex < nodes.size()) {
unique_lock<mutex> lock(mtx);
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
if (nextNodeIndex >= nodes.size()) {
break; // No more work remaining
}
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() {
while (true) {
unique_lock<mutex> lock(mtx);
@ -140,7 +185,6 @@ namespace bayesnet {
auto& pair = *std::next(nodes.begin(), nextNodeIndex);
++nextNodeIndex;
lock.unlock();
pair.second->computeCPT(dataset, laplaceSmoothing);
lock.lock();
nodes[pair.first] = std::move(pair.second);
@ -150,7 +194,6 @@ namespace bayesnet {
--activeThreads;
cv.notify_one();
});
++activeThreads;
}
for (auto& thread : threads) {
@ -218,7 +261,6 @@ namespace bayesnet {
evidence[features[i]] = sample[i];
}
return exactInference(evidence);
}
double Network::computeFactor(map<string, int>& completeEvidence)
{
@ -292,5 +334,4 @@ namespace bayesnet {
}
return edges;
}
}

View File

@ -24,6 +24,7 @@ namespace bayesnet {
double entropy(torch::Tensor&);
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
double mutualInformation(torch::Tensor&, torch::Tensor&);
void completeFit();
public:
Network();
Network(float, int);

View File

@ -6,6 +6,14 @@ namespace bayesnet {
: name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
{
}
void Node::clear()
{
parents.clear();
children.clear();
cpTable = torch::Tensor();
dimensions.clear();
numStates = 0;
}
string Node::getName() const
{
return name;

View File

@ -17,6 +17,7 @@ namespace bayesnet {
public:
vector<pair<string, string>> combinations(const vector<string>&);
Node(const std::string&, int);
void clear();
void addParent(Node*);
void addChild(Node*);
void removeParent(Node*);

View File

@ -3,7 +3,7 @@
namespace bayesnet {
using namespace torch;
TAN::TAN() : Classifier(Network()) {}
TAN::TAN() : Classifier(Network(0.1)) {}
void TAN::train()
{

View File

@ -18,7 +18,7 @@ namespace bayesnet {
// Iterate over cols
for (int i = 0; i < tensor.size(1); ++i) {
auto col_tensor = tensor.index({ "...", i });
auto col = vector<int>(col_tensor.data_ptr<int64_t>(), col_tensor.data_ptr<int64_t>() + tensor.size(0));
auto col = vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
result.push_back(col);
}
return result;

View File

@ -6,3 +6,4 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
add_executable(main Experiment.cc Folding.cc platformUtils.cc)
add_executable(testx testx.cpp Folding.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(testx ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@ -18,8 +18,13 @@
using namespace std;
Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
Result cross_validation(Fold* fold, string model_name, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
{
auto classifiers = map<string, bayesnet::BaseClassifier*>({
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
}
);
auto result = Result();
auto k = fold->getNumberOfFolds();
auto accuracy = torch::zeros({ k }, kFloat64);
@ -27,6 +32,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X,
auto test_time = torch::zeros({ k }, kFloat64);
Timer train_timer, test_timer;
for (int i = 0; i < k; i++) {
bayesnet::BaseClassifier* model = classifiers[model_name];
train_timer.start();
auto [train, test] = fold->getFold(i);
auto train_t = torch::tensor(train);
@ -43,8 +49,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X,
cout << "y_test: " << y_test.sizes() << endl;
train_time[i] = train_timer.getDuration();
test_timer.start();
//auto acc = model->score(X_test, y_test);
auto acc = 7;
auto acc = model->score(X_test, y_test);
test_time[i] = test_timer.getDuration();
accuracy[i] = acc;
}
@ -140,18 +145,16 @@ int main(int argc, char** argv)
fold = new StratifiedKFold(n_folds, y, -1);
else
fold = new KFold(n_folds, y.numel(), -1);
auto classifiers = map<string, bayesnet::BaseClassifier*>({
{ "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
{ "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() }
}
);
auto experiment = Experiment();
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp");
experiment.setStratified(stratified).setNFolds(5).addRandomSeed(271).setScoreName("accuracy");
bayesnet::BaseClassifier* model = classifiers[model_name];
auto result = cross_validation(fold, model, X, y, features, className, states);
auto result = cross_validation(fold, model_name, X, y, features, className, states);
result.setDataset(file_name);
experiment.addResult(result);
experiment.save(path);
for (auto& item : states) {
cout << item.first << ": " << item.second.size() << endl;
}
return 0;
}

View File

@ -1,13 +1,16 @@
#include "Folding.h"
#include <algorithm>
#include <map>
#include <random>
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{
random_device rd;
random_seed = default_random_engine(seed == -1 ? rd() : seed);
srand(seed == -1 ? time(0) : seed);
}
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed)
{
indices = vector<int>(n);
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
random_device rd;
default_random_engine random_seed(seed == -1 ? rd() : seed);
shuffle(indices.begin(), indices.end(), random_seed);
}
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
@ -54,8 +57,6 @@ void StratifiedKFold::build()
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
random_device rd;
default_random_engine random_seed(seed == -1 ? rd() : seed);
for (auto& [cls, indices] : class_indices) {
shuffle(indices.begin(), indices.end(), random_seed);
}
@ -71,7 +72,7 @@ void StratifiedKFold::build()
class_indices[label].erase(class_indices[label].begin(), it);
}
while (remainder_samples_to_take > 0) {
int fold = (arc4random() % static_cast<int>(k));
int fold = (rand() % static_cast<int>(k));
if (stratified_indices[fold].size() == fold_size) {
continue;
}

View File

@ -2,6 +2,7 @@
#define FOLDING_H
#include <torch/torch.h>
#include <vector>
#include <random>
using namespace std;
class Fold {
@ -9,8 +10,9 @@ protected:
int k;
int n;
int seed;
default_random_engine random_seed;
public:
Fold(int k, int n, int seed = -1) : k(k), n(n), seed(seed) {}
Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }

View File

@ -55,16 +55,16 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
auto states = map<string, vector<int>>();
if (discretize_dataset) {
auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int64_t>(Xr[0].size()), static_cast<int64_t>(Xr.size()) }, torch::kInt64);
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64));
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
} else {
Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat32);
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
}

View File

@ -62,6 +62,32 @@ int main()
cout << endl;
cout << "Test Statistics: " << counts(y, test);
cout << "==============================================================================" << endl;
torch::Tensor a = torch::zeros({ 5, 3 });
torch::Tensor b = torch::zeros({ 5 }) + 1;
torch::Tensor c = torch::cat({ a, b.view({5, 1}) }, 1);
cout << "a:" << a.sizes() << endl;
cout << a << endl;
cout << "b:" << b.sizes() << endl;
cout << b << endl;
cout << "c:" << c.sizes() << endl;
cout << c << endl;
torch::Tensor d = torch::zeros({ 5, 3 });
torch::Tensor e = torch::tensor({ 1,2,3,4,5 }) + 1;
torch::Tensor f = torch::cat({ d, e.view({5, 1}) }, 1);
cout << "d:" << d.sizes() << endl;
cout << d << endl;
cout << "e:" << e.sizes() << endl;
cout << e << endl;
cout << "f:" << f.sizes() << endl;
cout << f << endl;
auto indices = torch::tensor({ 0, 2, 4 });
auto k = f.index({ indices, "..." });
cout << "k:" << k.sizes() << endl;
cout << k << endl;
auto w = torch::index_select(f, 0, indices);
cout << "w:" << w.sizes() << endl;
cout << w << endl;
// cout << "Vector poly" << endl;
// auto some = vector<A>();
// auto cx = C(5, 4);