Add tests to reach 90% coverage

This commit is contained in:
2024-04-08 00:13:59 +02:00
parent 46cb8d30eb
commit 0d6a081d01
13 changed files with 424 additions and 56 deletions

View File

@@ -3,19 +3,6 @@
namespace bayesnet {
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
{
validHyperparameters = { "predict_voting" };
}
void AODELd::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{

View File

@@ -10,7 +10,6 @@ namespace bayesnet {
AODELd(bool predict_voting = true);
virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
protected:
void trainModel(const torch::Tensor& weights) override;

View File

@@ -1,27 +1,35 @@
#include <thread>
#include <mutex>
#include <sstream>
#include "Network.h"
#include "bayesnet/utils/bayesnetUtils.h"
namespace bayesnet {
Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted)
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
{
}
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
{
}
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
{
if (samples.defined())
samples = samples.clone();
for (const auto& node : other.nodes) {
nodes[node.first] = std::make_unique<Node>(*node.second);
}
}
void Network::initialize()
{
features = std::vector<std::string>();
features.clear();
className = "";
classNumStates = 0;
fitted = false;
nodes.clear();
samples = torch::Tensor();
}
float Network::getmaxThreads()
float Network::getMaxThreads() const
{
return maxThreads;
}
@@ -114,11 +122,14 @@ namespace bayesnet {
if (n_features != featureNames.size()) {
throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
}
if (features.size() == 0) {
throw std::invalid_argument("The network has not been initialized. You must call addNode() before calling fit()");
}
if (n_features != features.size() - 1) {
throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
}
if (find(features.begin(), features.end(), className) == features.end()) {
throw std::invalid_argument("className not found in Network::features");
throw std::invalid_argument("Class Name not found in Network::features");
}
for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) {
@@ -404,11 +415,13 @@ namespace bayesnet {
}
return result;
}
void Network::dump_cpt() const
std::string Network::dump_cpt() const
{
std::stringstream oss;
for (auto& node : nodes) {
std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
std::cout << node.second->getCPT() << std::endl;
oss << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
oss << node.second->getCPT() << std::endl;
}
return oss.str();
}
}

View File

@@ -10,10 +10,10 @@ namespace bayesnet {
public:
Network();
explicit Network(float);
explicit Network(Network&);
explicit Network(const Network&);
~Network() = default;
torch::Tensor& getSamples();
float getmaxThreads();
float getMaxThreads() const;
void addNode(const std::string&);
void addEdge(const std::string&, const std::string&);
std::map<std::string, std::unique_ptr<Node>>& getNodes();
@@ -39,7 +39,7 @@ namespace bayesnet {
std::vector<std::string> show() const;
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
std::string dump_cpt() const;
inline std::string version() { return { project_version.begin(), project_version.end() }; }
private:
std::map<std::string, std::unique_ptr<Node>> nodes;
@@ -49,7 +49,7 @@ namespace bayesnet {
std::vector<std::string> features; // Including classname
std::string className;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
torch::Tensor samples; // n+1xm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);

View File

@@ -9,12 +9,12 @@ namespace bayesnet {
, classNumStates(classNumStates)
{
}
//samples is nxm std::vector used to fit the model
//samples is n+1xm std::vector used to fit the model
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: features(features)
, className(className)
, classNumStates(classNumStates)
, samples(torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32))
, samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
{
for (int i = 0; i < vsamples.size(); ++i) {
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));

View File

@@ -5,11 +5,16 @@
#include <torch/torch.h>
namespace bayesnet {
class Metrics {
private:
int classNumStates = 0;
std::vector<double> scoresKBest;
std::vector<int> featuresKBest; // sorted indices of the features
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
public:
Metrics() = default;
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
torch::Tensor conditionalEdge(const torch::Tensor& weights);
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
protected:
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
std::string className;
@@ -34,16 +39,11 @@ namespace bayesnet {
v.erase(v.begin());
return temp;
}
public:
Metrics() = default;
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
torch::Tensor conditionalEdge(const torch::Tensor& weights);
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
private:
int classNumStates = 0;
std::vector<double> scoresKBest;
std::vector<int> featuresKBest; // sorted indices of the features
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
};
}
#endif