Remove old Files library
This commit is contained in:
@@ -88,7 +88,6 @@ message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
|
|||||||
## Configure test data path
|
## Configure test data path
|
||||||
cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
|
cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
|
||||||
configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
|
configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
|
||||||
add_subdirectory(lib/Files)
|
|
||||||
add_subdirectory(config)
|
add_subdirectory(config)
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
add_subdirectory(sample)
|
add_subdirectory(sample)
|
||||||
|
@@ -1,176 +0,0 @@
|
|||||||
#include "ArffFiles.h"
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <map>
|
|
||||||
#include <cctype> // std::isdigit
|
|
||||||
#include <algorithm> // std::all_of
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
ArffFiles::ArffFiles() = default;
|
|
||||||
|
|
||||||
std::vector<std::string> ArffFiles::getLines() const
|
|
||||||
{
|
|
||||||
return lines;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned long int ArffFiles::getSize() const
|
|
||||||
{
|
|
||||||
return lines.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
|
|
||||||
{
|
|
||||||
return attributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ArffFiles::getClassName() const
|
|
||||||
{
|
|
||||||
return className;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ArffFiles::getClassType() const
|
|
||||||
{
|
|
||||||
return classType;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<float>>& ArffFiles::getX()
|
|
||||||
{
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int>& ArffFiles::getY()
|
|
||||||
{
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ArffFiles::loadCommon(std::string fileName)
|
|
||||||
{
|
|
||||||
std::ifstream file(fileName);
|
|
||||||
if (!file.is_open()) {
|
|
||||||
throw std::invalid_argument("Unable to open file");
|
|
||||||
}
|
|
||||||
std::string line;
|
|
||||||
std::string keyword;
|
|
||||||
std::string attribute;
|
|
||||||
std::string type;
|
|
||||||
std::string type_w;
|
|
||||||
while (getline(file, line)) {
|
|
||||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
|
||||||
std::stringstream ss(line);
|
|
||||||
ss >> keyword >> attribute;
|
|
||||||
type = "";
|
|
||||||
while (ss >> type_w)
|
|
||||||
type += type_w + " ";
|
|
||||||
attributes.emplace_back(trim(attribute), trim(type));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (line[0] == '@') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
lines.push_back(line);
|
|
||||||
}
|
|
||||||
file.close();
|
|
||||||
if (attributes.empty())
|
|
||||||
throw std::invalid_argument("No attributes found");
|
|
||||||
}
|
|
||||||
|
|
||||||
void ArffFiles::load(const std::string& fileName, bool classLast)
|
|
||||||
{
|
|
||||||
int labelIndex;
|
|
||||||
loadCommon(fileName);
|
|
||||||
if (classLast) {
|
|
||||||
className = std::get<0>(attributes.back());
|
|
||||||
classType = std::get<1>(attributes.back());
|
|
||||||
attributes.pop_back();
|
|
||||||
labelIndex = static_cast<int>(attributes.size());
|
|
||||||
} else {
|
|
||||||
className = std::get<0>(attributes.front());
|
|
||||||
classType = std::get<1>(attributes.front());
|
|
||||||
attributes.erase(attributes.begin());
|
|
||||||
labelIndex = 0;
|
|
||||||
}
|
|
||||||
generateDataset(labelIndex);
|
|
||||||
}
|
|
||||||
void ArffFiles::load(const std::string& fileName, const std::string& name)
|
|
||||||
{
|
|
||||||
int labelIndex;
|
|
||||||
loadCommon(fileName);
|
|
||||||
bool found = false;
|
|
||||||
for (int i = 0; i < attributes.size(); ++i) {
|
|
||||||
if (attributes[i].first == name) {
|
|
||||||
className = std::get<0>(attributes[i]);
|
|
||||||
classType = std::get<1>(attributes[i]);
|
|
||||||
attributes.erase(attributes.begin() + i);
|
|
||||||
labelIndex = i;
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!found) {
|
|
||||||
throw std::invalid_argument("Class name not found");
|
|
||||||
}
|
|
||||||
generateDataset(labelIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ArffFiles::generateDataset(int labelIndex)
|
|
||||||
{
|
|
||||||
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
|
|
||||||
auto yy = std::vector<std::string>(lines.size(), "");
|
|
||||||
auto removeLines = std::vector<int>(); // Lines with missing values
|
|
||||||
for (size_t i = 0; i < lines.size(); i++) {
|
|
||||||
std::stringstream ss(lines[i]);
|
|
||||||
std::string value;
|
|
||||||
int pos = 0;
|
|
||||||
int xIndex = 0;
|
|
||||||
while (getline(ss, value, ',')) {
|
|
||||||
if (pos++ == labelIndex) {
|
|
||||||
yy[i] = value;
|
|
||||||
} else {
|
|
||||||
if (value == "?") {
|
|
||||||
X[xIndex++][i] = -1;
|
|
||||||
removeLines.push_back(i);
|
|
||||||
} else
|
|
||||||
X[xIndex++][i] = stof(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (auto i : removeLines) {
|
|
||||||
yy.erase(yy.begin() + i);
|
|
||||||
for (auto& x : X) {
|
|
||||||
x.erase(x.begin() + i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
y = factorize(yy);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ArffFiles::trim(const std::string& source)
|
|
||||||
{
|
|
||||||
std::string s(source);
|
|
||||||
s.erase(0, s.find_first_not_of(" '\n\r\t"));
|
|
||||||
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
|
|
||||||
{
|
|
||||||
std::vector<int> yy;
|
|
||||||
labels.clear();
|
|
||||||
yy.reserve(labels_t.size());
|
|
||||||
std::map<std::string, int> labelMap;
|
|
||||||
int i = 0;
|
|
||||||
for (const std::string& label : labels_t) {
|
|
||||||
if (labelMap.find(label) == labelMap.end()) {
|
|
||||||
labelMap[label] = i++;
|
|
||||||
bool allDigits = std::all_of(label.begin(), label.end(), isdigit);
|
|
||||||
if (allDigits)
|
|
||||||
labels.push_back("Class " + label);
|
|
||||||
else
|
|
||||||
labels.push_back(label);
|
|
||||||
}
|
|
||||||
yy.push_back(labelMap[label]);
|
|
||||||
}
|
|
||||||
return yy;
|
|
||||||
}
|
|
@@ -1,34 +0,0 @@
|
|||||||
#ifndef ARFFFILES_H
|
|
||||||
#define ARFFFILES_H
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
class ArffFiles {
|
|
||||||
public:
|
|
||||||
ArffFiles();
|
|
||||||
void load(const std::string&, bool = true);
|
|
||||||
void load(const std::string&, const std::string&);
|
|
||||||
std::vector<std::string> getLines() const;
|
|
||||||
unsigned long int getSize() const;
|
|
||||||
std::string getClassName() const;
|
|
||||||
std::string getClassType() const;
|
|
||||||
std::vector<std::string> getLabels() const { return labels; }
|
|
||||||
static std::string trim(const std::string&);
|
|
||||||
std::vector<std::vector<float>>& getX();
|
|
||||||
std::vector<int>& getY();
|
|
||||||
std::vector<std::pair<std::string, std::string>> getAttributes() const;
|
|
||||||
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
|
||||||
private:
|
|
||||||
std::vector<std::string> lines;
|
|
||||||
std::vector<std::pair<std::string, std::string>> attributes;
|
|
||||||
std::string className;
|
|
||||||
std::string classType;
|
|
||||||
std::vector<std::vector<float>> X;
|
|
||||||
std::vector<int> y;
|
|
||||||
std::vector<std::string> labels;
|
|
||||||
void generateDataset(int);
|
|
||||||
void loadCommon(std::string);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@@ -1 +0,0 @@
|
|||||||
add_library(ArffFiles ArffFiles.cc)
|
|
@@ -12,4 +12,4 @@ include_directories(
|
|||||||
${Bayesnet_INCLUDE_DIRS}
|
${Bayesnet_INCLUDE_DIRS}
|
||||||
)
|
)
|
||||||
add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp)
|
add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp)
|
||||||
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
@@ -5,7 +5,7 @@
|
|||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
#include <argparse/argparse.hpp>
|
#include <argparse/argparse.hpp>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
#include <ArffFiles.h>
|
#include <ArffFiles.hpp>
|
||||||
#include <CPPFImdlp.h>
|
#include <CPPFImdlp.h>
|
||||||
#include <folding.hpp>
|
#include <folding.hpp>
|
||||||
#include <bayesnet/utils/BayesMetrics.h>
|
#include <bayesnet/utils/BayesMetrics.h>
|
||||||
@@ -79,11 +79,11 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
program.add_argument("-p", "--path")
|
program.add_argument("-p", "--path")
|
||||||
.help(" folder where the data files are located, default")
|
.help(" folder where the data files are located, default")
|
||||||
.default_value(std::string{ PATH }
|
.default_value(std::string{ PATH }
|
||||||
);
|
);
|
||||||
program.add_argument("-m", "--model")
|
program.add_argument("-m", "--model")
|
||||||
.help("Model to use " + platform::Models::instance()->toString())
|
.help("Model to use " + platform::Models::instance()->toString())
|
||||||
.action([](const std::string& value) {
|
.action([](const std::string& value) {
|
||||||
@@ -93,7 +93,7 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
|
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
||||||
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
|
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
|
||||||
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
||||||
@@ -112,129 +112,129 @@ int main(int argc, char** argv)
|
|||||||
catch (...) {
|
catch (...) {
|
||||||
throw runtime_error("Number of folds must be an integer");
|
throw runtime_error("Number of folds must be an integer");
|
||||||
}});
|
}});
|
||||||
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
||||||
bool class_last, stratified, tensors, dump_cpt;
|
bool class_last, stratified, tensors, dump_cpt;
|
||||||
std::string model_name, file_name, path, complete_file_name;
|
std::string model_name, file_name, path, complete_file_name;
|
||||||
int nFolds, seed;
|
int nFolds, seed;
|
||||||
try {
|
try {
|
||||||
program.parse_args(argc, argv);
|
program.parse_args(argc, argv);
|
||||||
file_name = program.get<std::string>("dataset");
|
file_name = program.get<std::string>("dataset");
|
||||||
path = program.get<std::string>("path");
|
path = program.get<std::string>("path");
|
||||||
model_name = program.get<std::string>("model");
|
model_name = program.get<std::string>("model");
|
||||||
complete_file_name = path + file_name + ".arff";
|
complete_file_name = path + file_name + ".arff";
|
||||||
stratified = program.get<bool>("stratified");
|
stratified = program.get<bool>("stratified");
|
||||||
tensors = program.get<bool>("tensors");
|
tensors = program.get<bool>("tensors");
|
||||||
nFolds = program.get<int>("folds");
|
nFolds = program.get<int>("folds");
|
||||||
seed = program.get<int>("seed");
|
seed = program.get<int>("seed");
|
||||||
dump_cpt = program.get<bool>("dumpcpt");
|
dump_cpt = program.get<bool>("dumpcpt");
|
||||||
class_last = datasets[file_name];
|
class_last = datasets[file_name];
|
||||||
if (!file_exists(complete_file_name)) {
|
if (!file_exists(complete_file_name)) {
|
||||||
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const exception& err) {
|
||||||
|
cerr << err.what() << std::endl;
|
||||||
|
cerr << program;
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
catch (const exception& err) {
|
|
||||||
cerr << err.what() << std::endl;
|
|
||||||
cerr << program;
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Begin Processing
|
* Begin Processing
|
||||||
*/
|
*/
|
||||||
auto handler = ArffFiles();
|
auto handler = ArffFiles();
|
||||||
handler.load(complete_file_name, class_last);
|
handler.load(complete_file_name, class_last);
|
||||||
// Get Dataset X, y
|
// Get Dataset X, y
|
||||||
std::vector<mdlp::samples_t>& X = handler.getX();
|
std::vector<mdlp::samples_t>& X = handler.getX();
|
||||||
mdlp::labels_t& y = handler.getY();
|
mdlp::labels_t& y = handler.getY();
|
||||||
// Get className & Features
|
// Get className & Features
|
||||||
auto className = handler.getClassName();
|
auto className = handler.getClassName();
|
||||||
std::vector<std::string> features;
|
std::vector<std::string> features;
|
||||||
auto attributes = handler.getAttributes();
|
auto attributes = handler.getAttributes();
|
||||||
transform(attributes.begin(), attributes.end(), back_inserter(features),
|
transform(attributes.begin(), attributes.end(), back_inserter(features),
|
||||||
[](const pair<std::string, std::string>& item) { return item.first; });
|
[](const pair<std::string, std::string>& item) { return item.first; });
|
||||||
// Discretize Dataset
|
// Discretize Dataset
|
||||||
auto [Xd, maxes] = discretize(X, y, features);
|
auto [Xd, maxes] = discretize(X, y, features);
|
||||||
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
||||||
map<std::string, std::vector<int>> states;
|
map<std::string, std::vector<int>> states;
|
||||||
for (auto feature : features) {
|
for (auto feature : features) {
|
||||||
states[feature] = std::vector<int>(maxes[feature]);
|
states[feature] = std::vector<int>(maxes[feature]);
|
||||||
}
|
|
||||||
states[className] = std::vector<int>(maxes[className]);
|
|
||||||
auto clf = platform::Models::instance()->create(model_name);
|
|
||||||
clf->fit(Xd, y, features, className, states);
|
|
||||||
if (dump_cpt) {
|
|
||||||
std::cout << "--- CPT Tables ---" << std::endl;
|
|
||||||
clf->dump_cpt();
|
|
||||||
}
|
|
||||||
auto lines = clf->show();
|
|
||||||
for (auto line : lines) {
|
|
||||||
std::cout << line << std::endl;
|
|
||||||
}
|
|
||||||
std::cout << "--- Topological Order ---" << std::endl;
|
|
||||||
auto order = clf->topological_order();
|
|
||||||
for (auto name : order) {
|
|
||||||
std::cout << name << ", ";
|
|
||||||
}
|
|
||||||
std::cout << "end." << std::endl;
|
|
||||||
auto score = clf->score(Xd, y);
|
|
||||||
std::cout << "Score: " << score << std::endl;
|
|
||||||
auto graph = clf->graph();
|
|
||||||
auto dot_file = model_name + "_" + file_name;
|
|
||||||
ofstream file(dot_file + ".dot");
|
|
||||||
file << graph;
|
|
||||||
file.close();
|
|
||||||
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
|
|
||||||
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
|
|
||||||
std::string stratified_string = stratified ? " Stratified" : "";
|
|
||||||
std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
|
|
||||||
std::cout << "==========================================" << std::endl;
|
|
||||||
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
|
||||||
torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
|
||||||
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
|
||||||
}
|
|
||||||
float total_score = 0, total_score_train = 0, score_train, score_test;
|
|
||||||
folding::Fold* fold;
|
|
||||||
double nodes = 0.0;
|
|
||||||
if (stratified)
|
|
||||||
fold = new folding::StratifiedKFold(nFolds, y, seed);
|
|
||||||
else
|
|
||||||
fold = new folding::KFold(nFolds, y.size(), seed);
|
|
||||||
for (auto i = 0; i < nFolds; ++i) {
|
|
||||||
auto [train, test] = fold->getFold(i);
|
|
||||||
std::cout << "Fold: " << i + 1 << std::endl;
|
|
||||||
if (tensors) {
|
|
||||||
auto ttrain = torch::tensor(train, torch::kInt64);
|
|
||||||
auto ttest = torch::tensor(test, torch::kInt64);
|
|
||||||
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
|
||||||
torch::Tensor ytraint = yt.index({ ttrain });
|
|
||||||
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
|
||||||
torch::Tensor ytestt = yt.index({ ttest });
|
|
||||||
clf->fit(Xtraint, ytraint, features, className, states);
|
|
||||||
auto temp = clf->predict(Xtraint);
|
|
||||||
score_train = clf->score(Xtraint, ytraint);
|
|
||||||
score_test = clf->score(Xtestt, ytestt);
|
|
||||||
} else {
|
|
||||||
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
|
||||||
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
|
||||||
clf->fit(Xtrain, ytrain, features, className, states);
|
|
||||||
std::cout << "Nodes: " << clf->getNumberOfNodes() << std::endl;
|
|
||||||
nodes += clf->getNumberOfNodes();
|
|
||||||
score_train = clf->score(Xtrain, ytrain);
|
|
||||||
score_test = clf->score(Xtest, ytest);
|
|
||||||
}
|
}
|
||||||
|
states[className] = std::vector<int>(maxes[className]);
|
||||||
|
auto clf = platform::Models::instance()->create(model_name);
|
||||||
|
clf->fit(Xd, y, features, className, states);
|
||||||
if (dump_cpt) {
|
if (dump_cpt) {
|
||||||
std::cout << "--- CPT Tables ---" << std::endl;
|
std::cout << "--- CPT Tables ---" << std::endl;
|
||||||
clf->dump_cpt();
|
clf->dump_cpt();
|
||||||
}
|
}
|
||||||
total_score_train += score_train;
|
auto lines = clf->show();
|
||||||
total_score += score_test;
|
for (auto line : lines) {
|
||||||
std::cout << "Score Train: " << score_train << std::endl;
|
std::cout << line << std::endl;
|
||||||
std::cout << "Score Test : " << score_test << std::endl;
|
}
|
||||||
std::cout << "-------------------------------------------------------------------------------" << std::endl;
|
std::cout << "--- Topological Order ---" << std::endl;
|
||||||
}
|
auto order = clf->topological_order();
|
||||||
std::cout << "Nodes: " << nodes / nFolds << std::endl;
|
for (auto name : order) {
|
||||||
std::cout << "**********************************************************************************" << std::endl;
|
std::cout << name << ", ";
|
||||||
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
|
}
|
||||||
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
|
std::cout << "end." << std::endl;
|
||||||
|
auto score = clf->score(Xd, y);
|
||||||
|
std::cout << "Score: " << score << std::endl;
|
||||||
|
auto graph = clf->graph();
|
||||||
|
auto dot_file = model_name + "_" + file_name;
|
||||||
|
ofstream file(dot_file + ".dot");
|
||||||
|
file << graph;
|
||||||
|
file.close();
|
||||||
|
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
|
||||||
|
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
|
||||||
|
std::string stratified_string = stratified ? " Stratified" : "";
|
||||||
|
std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
|
||||||
|
std::cout << "==========================================" << std::endl;
|
||||||
|
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
||||||
|
torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
||||||
|
for (int i = 0; i < features.size(); ++i) {
|
||||||
|
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||||
|
}
|
||||||
|
float total_score = 0, total_score_train = 0, score_train, score_test;
|
||||||
|
folding::Fold* fold;
|
||||||
|
double nodes = 0.0;
|
||||||
|
if (stratified)
|
||||||
|
fold = new folding::StratifiedKFold(nFolds, y, seed);
|
||||||
|
else
|
||||||
|
fold = new folding::KFold(nFolds, y.size(), seed);
|
||||||
|
for (auto i = 0; i < nFolds; ++i) {
|
||||||
|
auto [train, test] = fold->getFold(i);
|
||||||
|
std::cout << "Fold: " << i + 1 << std::endl;
|
||||||
|
if (tensors) {
|
||||||
|
auto ttrain = torch::tensor(train, torch::kInt64);
|
||||||
|
auto ttest = torch::tensor(test, torch::kInt64);
|
||||||
|
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
||||||
|
torch::Tensor ytraint = yt.index({ ttrain });
|
||||||
|
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
||||||
|
torch::Tensor ytestt = yt.index({ ttest });
|
||||||
|
clf->fit(Xtraint, ytraint, features, className, states);
|
||||||
|
auto temp = clf->predict(Xtraint);
|
||||||
|
score_train = clf->score(Xtraint, ytraint);
|
||||||
|
score_test = clf->score(Xtestt, ytestt);
|
||||||
|
} else {
|
||||||
|
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
||||||
|
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
||||||
|
clf->fit(Xtrain, ytrain, features, className, states);
|
||||||
|
std::cout << "Nodes: " << clf->getNumberOfNodes() << std::endl;
|
||||||
|
nodes += clf->getNumberOfNodes();
|
||||||
|
score_train = clf->score(Xtrain, ytrain);
|
||||||
|
score_test = clf->score(Xtest, ytest);
|
||||||
|
}
|
||||||
|
if (dump_cpt) {
|
||||||
|
std::cout << "--- CPT Tables ---" << std::endl;
|
||||||
|
clf->dump_cpt();
|
||||||
|
}
|
||||||
|
total_score_train += score_train;
|
||||||
|
total_score += score_test;
|
||||||
|
std::cout << "Score Train: " << score_train << std::endl;
|
||||||
|
std::cout << "Score Test : " << score_test << std::endl;
|
||||||
|
std::cout << "-------------------------------------------------------------------------------" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "Nodes: " << nodes / nFolds << std::endl;
|
||||||
|
std::cout << "**********************************************************************************" << std::endl;
|
||||||
|
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
|
||||||
|
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
|
||||||
}
|
}
|
@@ -26,7 +26,7 @@ add_executable(
|
|||||||
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
|
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
|
||||||
results/Result.cpp
|
results/Result.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||||
|
|
||||||
# b_grid
|
# b_grid
|
||||||
set(grid_sources GridSearch.cpp GridData.cpp)
|
set(grid_sources GridSearch.cpp GridData.cpp)
|
||||||
@@ -35,7 +35,7 @@ add_executable(b_grid commands/b_grid.cpp ${grid_sources}
|
|||||||
common/Datasets.cpp common/Dataset.cpp
|
common/Datasets.cpp common/Dataset.cpp
|
||||||
main/HyperParameters.cpp main/Models.cpp
|
main/HyperParameters.cpp main/Models.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||||
|
|
||||||
# b_list
|
# b_list
|
||||||
add_executable(b_list commands/b_list.cpp
|
add_executable(b_list commands/b_list.cpp
|
||||||
@@ -44,7 +44,7 @@ add_executable(b_list commands/b_list.cpp
|
|||||||
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
|
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
|
||||||
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||||
|
|
||||||
# b_main
|
# b_main
|
||||||
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
|
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
|
||||||
@@ -54,7 +54,7 @@ add_executable(b_main commands/b_main.cpp ${main_sources}
|
|||||||
reports/ReportConsole.cpp reports/ReportBase.cpp
|
reports/ReportConsole.cpp reports/ReportBase.cpp
|
||||||
results/Result.cpp
|
results/Result.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||||
|
|
||||||
# b_manage
|
# b_manage
|
||||||
set(manage_sources ManageScreen.cpp CommandParser.cpp ResultsManager.cpp)
|
set(manage_sources ManageScreen.cpp CommandParser.cpp ResultsManager.cpp)
|
||||||
@@ -66,4 +66,4 @@ add_executable(
|
|||||||
results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
||||||
main/Scores.cpp
|
main/Scores.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp "${BayesNet}")
|
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" mdlp "${BayesNet}")
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
#include <ArffFiles.h>
|
#include <ArffFiles.hpp>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include "Dataset.h"
|
#include "Dataset.h"
|
||||||
namespace platform {
|
namespace platform {
|
||||||
|
Reference in New Issue
Block a user