mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-17 00:26:10 +00:00
Adding Metrics
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
include README.md LICENSE
|
include README.md LICENSE
|
||||||
include bayesclass/FeatureSelect.h
|
include bayesclass/FeatureSelect.h
|
||||||
include bayesclass/Node.h
|
include bayesclass/Node.h
|
||||||
include bayesclass/Network.h
|
include bayesclass/Network.h
|
||||||
|
include bayesclass/Metrics.hpp
|
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,6 @@
|
|||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
from libcpp.string cimport string
|
from libcpp.string cimport string
|
||||||
|
|
||||||
|
|
||||||
cdef extern from "Network.h" namespace "bayesnet":
|
cdef extern from "Network.h" namespace "bayesnet":
|
||||||
cdef cppclass Network:
|
cdef cppclass Network:
|
||||||
Network(float, float) except +
|
Network(float, float) except +
|
||||||
@@ -54,3 +53,25 @@ cdef class BayesNetwork:
|
|||||||
return self.thisptr.getClassNumStates()
|
return self.thisptr.getClassNumStates()
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
return (BayesNetwork, ())
|
return (BayesNetwork, ())
|
||||||
|
|
||||||
|
cdef extern from "Metrics.hpp" namespace "bayesnet":
|
||||||
|
cdef cppclass Metrics:
|
||||||
|
Metrics(vector[vector[int]], vector[int], vector[string]&, string&, int) except +
|
||||||
|
vector[float] conditionalEdgeWeights()
|
||||||
|
vector[float] test()
|
||||||
|
|
||||||
|
cdef class CMetrics:
|
||||||
|
cdef Metrics *thisptr
|
||||||
|
def __cinit__(self, X, y, features, className, classStates):
|
||||||
|
X_ = [X[:, i] for i in range(X.shape[1])]
|
||||||
|
features_bytes = [x.encode() for x in features]
|
||||||
|
self.thisptr = new Metrics(X_, y, features_bytes, className.encode(), classStates)
|
||||||
|
def __dealloc__(self):
|
||||||
|
del self.thisptr
|
||||||
|
def conditionalEdgeWeights(self):
|
||||||
|
return self.thisptr.conditionalEdgeWeights()
|
||||||
|
def test(self):
|
||||||
|
return self.thisptr.test()
|
||||||
|
def __reduce__(self):
|
||||||
|
return (CMetrics, ())
|
||||||
|
|
||||||
|
114
bayesclass/Metrics.cc
Normal file
114
bayesclass/Metrics.cc
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
#include "Metrics.hpp"
|
||||||
|
using namespace std;
|
||||||
|
namespace bayesnet {
|
||||||
|
Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
|
||||||
|
: samples(samples)
|
||||||
|
, features(features)
|
||||||
|
, className(className)
|
||||||
|
, classNumStates(classNumStates)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
|
||||||
|
: features(features)
|
||||||
|
, className(className)
|
||||||
|
, classNumStates(classNumStates)
|
||||||
|
{
|
||||||
|
samples = torch::zeros({ static_cast<int64_t>(vsamples[0].size()), static_cast<int64_t>(vsamples.size() + 1) }, torch::kInt64);
|
||||||
|
for (int i = 0; i < vsamples.size(); ++i) {
|
||||||
|
samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt64));
|
||||||
|
}
|
||||||
|
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
|
||||||
|
}
|
||||||
|
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
|
||||||
|
{
|
||||||
|
vector<pair<string, string>> result;
|
||||||
|
for (int i = 0; i < source.size(); ++i) {
|
||||||
|
string temp = source[i];
|
||||||
|
for (int j = i + 1; j < source.size(); ++j) {
|
||||||
|
result.push_back({ temp, source[j] });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
vector<float> Metrics::conditionalEdgeWeights()
|
||||||
|
{
|
||||||
|
auto result = vector<double>();
|
||||||
|
auto source = vector<string>(features);
|
||||||
|
source.push_back(className);
|
||||||
|
auto combinations = doCombinations(source);
|
||||||
|
// Compute class prior
|
||||||
|
auto margin = torch::zeros({ classNumStates });
|
||||||
|
for (int value = 0; value < classNumStates; ++value) {
|
||||||
|
auto mask = samples.index({ "...", -1 }) == value;
|
||||||
|
margin[value] = mask.sum().item<float>() / samples.sizes()[0];
|
||||||
|
}
|
||||||
|
for (auto [first, second] : combinations) {
|
||||||
|
int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
|
||||||
|
int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
|
||||||
|
double accumulated = 0;
|
||||||
|
for (int value = 0; value < classNumStates; ++value) {
|
||||||
|
auto mask = samples.index({ "...", -1 }) == value;
|
||||||
|
auto first_dataset = samples.index({ mask, index_first });
|
||||||
|
auto second_dataset = samples.index({ mask, index_second });
|
||||||
|
auto mi = mutualInformation(first_dataset, second_dataset);
|
||||||
|
auto pb = margin[value].item<float>();
|
||||||
|
accumulated += pb * mi;
|
||||||
|
}
|
||||||
|
result.push_back(accumulated);
|
||||||
|
}
|
||||||
|
long n_vars = source.size();
|
||||||
|
auto matrix = torch::zeros({ n_vars, n_vars });
|
||||||
|
auto indices = torch::triu_indices(n_vars, n_vars, 1);
|
||||||
|
for (auto i = 0; i < result.size(); ++i) {
|
||||||
|
auto x = indices[0][i];
|
||||||
|
auto y = indices[1][i];
|
||||||
|
matrix[x][y] = result[i];
|
||||||
|
matrix[y][x] = result[i];
|
||||||
|
}
|
||||||
|
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
double Metrics::entropy(torch::Tensor& feature)
|
||||||
|
{
|
||||||
|
torch::Tensor counts = feature.bincount();
|
||||||
|
int totalWeight = counts.sum().item<int>();
|
||||||
|
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
|
||||||
|
torch::Tensor logProbs = torch::log(probs);
|
||||||
|
torch::Tensor entropy = -probs * logProbs;
|
||||||
|
return entropy.nansum().item<double>();
|
||||||
|
}
|
||||||
|
// H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
|
||||||
|
double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
|
||||||
|
{
|
||||||
|
int numSamples = firstFeature.sizes()[0];
|
||||||
|
torch::Tensor featureCounts = secondFeature.bincount();
|
||||||
|
unordered_map<int, unordered_map<int, double>> jointCounts;
|
||||||
|
double totalWeight = 0;
|
||||||
|
for (auto i = 0; i < numSamples; i++) {
|
||||||
|
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
|
||||||
|
totalWeight += 1;
|
||||||
|
}
|
||||||
|
if (totalWeight == 0)
|
||||||
|
throw invalid_argument("Total weight should not be zero");
|
||||||
|
double entropyValue = 0;
|
||||||
|
for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
|
||||||
|
double p_f = featureCounts[value].item<double>() / totalWeight;
|
||||||
|
double entropy_f = 0;
|
||||||
|
for (auto& [label, jointCount] : jointCounts[value]) {
|
||||||
|
double p_l_f = jointCount / featureCounts[value].item<double>();
|
||||||
|
if (p_l_f > 0) {
|
||||||
|
entropy_f -= p_l_f * log(p_l_f);
|
||||||
|
} else {
|
||||||
|
entropy_f = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entropyValue += p_f * entropy_f;
|
||||||
|
}
|
||||||
|
return entropyValue;
|
||||||
|
}
|
||||||
|
// I(X;Y) = H(Y) - H(Y|X)
|
||||||
|
double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
|
||||||
|
{
|
||||||
|
return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
|
||||||
|
}
|
||||||
|
}
|
24
bayesclass/Metrics.hpp
Normal file
24
bayesclass/Metrics.hpp
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#ifndef BAYESNET_METRICS_H
|
||||||
|
#define BAYESNET_METRICS_H
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
using namespace std;
|
||||||
|
namespace bayesnet {
|
||||||
|
class Metrics {
|
||||||
|
private:
|
||||||
|
torch::Tensor samples;
|
||||||
|
vector<string> features;
|
||||||
|
string className;
|
||||||
|
int classNumStates;
|
||||||
|
vector<pair<string, string>> doCombinations(const vector<string>&);
|
||||||
|
double entropy(torch::Tensor&);
|
||||||
|
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
|
||||||
|
double mutualInformation(torch::Tensor&, torch::Tensor&);
|
||||||
|
public:
|
||||||
|
Metrics(torch::Tensor&, vector<string>&, string&, int);
|
||||||
|
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
|
||||||
|
vector<float> conditionalEdgeWeights();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -98,11 +98,14 @@ namespace bayesnet {
|
|||||||
this->className = className;
|
this->className = className;
|
||||||
dataset.clear();
|
dataset.clear();
|
||||||
|
|
||||||
// Build dataset
|
// Build dataset & tensor of samples
|
||||||
|
samples = torch::zeros({ static_cast<int64_t>(input_data[0].size()), static_cast<int64_t>(input_data.size() + 1) }, torch::kInt64);
|
||||||
for (int i = 0; i < featureNames.size(); ++i) {
|
for (int i = 0; i < featureNames.size(); ++i) {
|
||||||
dataset[featureNames[i]] = input_data[i];
|
dataset[featureNames[i]] = input_data[i];
|
||||||
|
samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64));
|
||||||
}
|
}
|
||||||
dataset[className] = labels;
|
dataset[className] = labels;
|
||||||
|
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
|
||||||
classNumStates = *max_element(labels.begin(), labels.end()) + 1;
|
classNumStates = *max_element(labels.begin(), labels.end()) + 1;
|
||||||
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
|
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
|
||||||
if (maxThreadsRunning < 1) {
|
if (maxThreadsRunning < 1) {
|
||||||
@@ -150,14 +153,14 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<int> Network::predict(const vector<vector<int>>& samples)
|
vector<int> Network::predict(const vector<vector<int>>& tsamples)
|
||||||
{
|
{
|
||||||
vector<int> predictions;
|
vector<int> predictions;
|
||||||
vector<int> sample;
|
vector<int> sample;
|
||||||
for (int row = 0; row < samples[0].size(); ++row) {
|
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||||
sample.clear();
|
sample.clear();
|
||||||
for (int col = 0; col < samples.size(); ++col) {
|
for (int col = 0; col < tsamples.size(); ++col) {
|
||||||
sample.push_back(samples[col][row]);
|
sample.push_back(tsamples[col][row]);
|
||||||
}
|
}
|
||||||
vector<double> classProbabilities = predict_sample(sample);
|
vector<double> classProbabilities = predict_sample(sample);
|
||||||
// Find the class with the maximum posterior probability
|
// Find the class with the maximum posterior probability
|
||||||
@@ -167,22 +170,22 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
return predictions;
|
return predictions;
|
||||||
}
|
}
|
||||||
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& samples)
|
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
|
||||||
{
|
{
|
||||||
vector<vector<double>> predictions;
|
vector<vector<double>> predictions;
|
||||||
vector<int> sample;
|
vector<int> sample;
|
||||||
for (int row = 0; row < samples[0].size(); ++row) {
|
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||||
sample.clear();
|
sample.clear();
|
||||||
for (int col = 0; col < samples.size(); ++col) {
|
for (int col = 0; col < tsamples.size(); ++col) {
|
||||||
sample.push_back(samples[col][row]);
|
sample.push_back(tsamples[col][row]);
|
||||||
}
|
}
|
||||||
predictions.push_back(predict_sample(sample));
|
predictions.push_back(predict_sample(sample));
|
||||||
}
|
}
|
||||||
return predictions;
|
return predictions;
|
||||||
}
|
}
|
||||||
double Network::score(const vector<vector<int>>& samples, const vector<int>& labels)
|
double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
|
||||||
{
|
{
|
||||||
vector<int> y_pred = predict(samples);
|
vector<int> y_pred = predict(tsamples);
|
||||||
int correct = 0;
|
int correct = 0;
|
||||||
for (int i = 0; i < y_pred.size(); ++i) {
|
for (int i = 0; i < y_pred.size(); ++i) {
|
||||||
if (y_pred[i] == labels[i]) {
|
if (y_pred[i] == labels[i]) {
|
||||||
@@ -238,4 +241,83 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
double Network::mutual_info(torch::Tensor& first, torch::Tensor& second)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
torch::Tensor Network::conditionalEdgeWeight()
|
||||||
|
{
|
||||||
|
auto result = vector<double>();
|
||||||
|
auto source = vector<string>(features);
|
||||||
|
source.push_back(className);
|
||||||
|
auto combinations = nodes[className]->combinations(source);
|
||||||
|
auto margin = nodes[className]->getCPT();
|
||||||
|
for (auto [first, second] : combinations) {
|
||||||
|
int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
|
||||||
|
int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
|
||||||
|
double accumulated = 0;
|
||||||
|
for (int value = 0; value < classNumStates; ++value) {
|
||||||
|
auto mask = samples.index({ "...", -1 }) == value;
|
||||||
|
auto first_dataset = samples.index({ mask, index_first });
|
||||||
|
auto second_dataset = samples.index({ mask, index_second });
|
||||||
|
auto mi = mutualInformation(first_dataset, second_dataset);
|
||||||
|
auto pb = margin[value].item<float>();
|
||||||
|
accumulated += pb * mi;
|
||||||
|
}
|
||||||
|
result.push_back(accumulated);
|
||||||
|
}
|
||||||
|
long n_vars = source.size();
|
||||||
|
auto matrix = torch::zeros({ n_vars, n_vars });
|
||||||
|
auto indices = torch::triu_indices(n_vars, n_vars, 1);
|
||||||
|
for (auto i = 0; i < result.size(); ++i) {
|
||||||
|
auto x = indices[0][i];
|
||||||
|
auto y = indices[1][i];
|
||||||
|
matrix[x][y] = result[i];
|
||||||
|
matrix[y][x] = result[i];
|
||||||
|
}
|
||||||
|
return matrix;
|
||||||
|
}
|
||||||
|
double Network::entropy(torch::Tensor& feature)
|
||||||
|
{
|
||||||
|
torch::Tensor counts = feature.bincount();
|
||||||
|
int totalWeight = counts.sum().item<int>();
|
||||||
|
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
|
||||||
|
torch::Tensor logProbs = torch::log(probs);
|
||||||
|
torch::Tensor entropy = -probs * logProbs;
|
||||||
|
return entropy.nansum().item<double>();
|
||||||
|
}
|
||||||
|
// H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
|
||||||
|
double Network::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
|
||||||
|
{
|
||||||
|
int numSamples = firstFeature.sizes()[0];
|
||||||
|
torch::Tensor featureCounts = secondFeature.bincount();
|
||||||
|
unordered_map<int, unordered_map<int, double>> jointCounts;
|
||||||
|
double totalWeight = 0;
|
||||||
|
for (auto i = 0; i < numSamples; i++) {
|
||||||
|
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
|
||||||
|
totalWeight += 1;
|
||||||
|
}
|
||||||
|
if (totalWeight == 0)
|
||||||
|
throw invalid_argument("Total weight should not be zero");
|
||||||
|
double entropyValue = 0;
|
||||||
|
for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
|
||||||
|
double p_f = featureCounts[value].item<double>() / totalWeight;
|
||||||
|
double entropy_f = 0;
|
||||||
|
for (auto& [label, jointCount] : jointCounts[value]) {
|
||||||
|
double p_l_f = jointCount / featureCounts[value].item<double>();
|
||||||
|
if (p_l_f > 0) {
|
||||||
|
entropy_f -= p_l_f * log(p_l_f);
|
||||||
|
} else {
|
||||||
|
entropy_f = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entropyValue += p_f * entropy_f;
|
||||||
|
}
|
||||||
|
return entropyValue;
|
||||||
|
}
|
||||||
|
// I(X;Y) = H(Y) - H(Y|X)
|
||||||
|
double Network::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
|
||||||
|
{
|
||||||
|
return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -19,7 +19,12 @@ namespace bayesnet {
|
|||||||
vector<double> predict_sample(const vector<int>&);
|
vector<double> predict_sample(const vector<int>&);
|
||||||
vector<double> exactInference(map<string, int>&);
|
vector<double> exactInference(map<string, int>&);
|
||||||
double computeFactor(map<string, int>&);
|
double computeFactor(map<string, int>&);
|
||||||
|
double mutual_info(torch::Tensor&, torch::Tensor&);
|
||||||
|
double entropy(torch::Tensor&);
|
||||||
|
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
|
||||||
|
double mutualInformation(torch::Tensor&, torch::Tensor&);
|
||||||
public:
|
public:
|
||||||
|
torch::Tensor samples;
|
||||||
Network();
|
Network();
|
||||||
Network(float, int);
|
Network(float, int);
|
||||||
Network(float);
|
Network(float);
|
||||||
@@ -35,6 +40,8 @@ namespace bayesnet {
|
|||||||
string getClassName();
|
string getClassName();
|
||||||
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
|
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
|
||||||
vector<int> predict(const vector<vector<int>>&);
|
vector<int> predict(const vector<vector<int>>&);
|
||||||
|
//Computes the conditional edge weight of variable index u and v conditioned on class_node
|
||||||
|
torch::Tensor conditionalEdgeWeight();
|
||||||
vector<vector<double>> predict_proba(const vector<vector<int>>&);
|
vector<vector<double>> predict_proba(const vector<vector<int>>&);
|
||||||
double score(const vector<vector<int>>&, const vector<int>&);
|
double score(const vector<vector<int>>&, const vector<int>&);
|
||||||
inline string version() { return "0.1.0"; }
|
inline string version() { return "0.1.0"; }
|
||||||
|
@@ -57,23 +57,23 @@ namespace bayesnet {
|
|||||||
*/
|
*/
|
||||||
unsigned Node::minFill()
|
unsigned Node::minFill()
|
||||||
{
|
{
|
||||||
set<string> neighbors;
|
unordered_set<string> neighbors;
|
||||||
for (auto child : children) {
|
for (auto child : children) {
|
||||||
neighbors.emplace(child->getName());
|
neighbors.emplace(child->getName());
|
||||||
}
|
}
|
||||||
for (auto parent : parents) {
|
for (auto parent : parents) {
|
||||||
neighbors.emplace(parent->getName());
|
neighbors.emplace(parent->getName());
|
||||||
}
|
}
|
||||||
return combinations(neighbors).size();
|
auto source = vector<string>(neighbors.begin(), neighbors.end());
|
||||||
|
return combinations(source).size();
|
||||||
}
|
}
|
||||||
vector<string> Node::combinations(const set<string>& neighbors)
|
vector<pair<string, string>> Node::combinations(const vector<string>& source)
|
||||||
{
|
{
|
||||||
vector<string> source(neighbors.begin(), neighbors.end());
|
vector<pair<string, string>> result;
|
||||||
vector<string> result;
|
|
||||||
for (int i = 0; i < source.size(); ++i) {
|
for (int i = 0; i < source.size(); ++i) {
|
||||||
string temp = source[i];
|
string temp = source[i];
|
||||||
for (int j = i + 1; j < source.size(); ++j) {
|
for (int j = i + 1; j < source.size(); ++j) {
|
||||||
result.push_back(temp + source[j]);
|
result.push_back({ temp, source[j] });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
#ifndef NODE_H
|
#ifndef NODE_H
|
||||||
#define NODE_H
|
#define NODE_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
//#include <torch/extension.h>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
@@ -14,8 +14,8 @@ namespace bayesnet {
|
|||||||
int numStates; // number of states of the variable
|
int numStates; // number of states of the variable
|
||||||
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
|
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
|
||||||
vector<int64_t> dimensions; // dimensions of the cpTable
|
vector<int64_t> dimensions; // dimensions of the cpTable
|
||||||
vector<string> combinations(const set<string>&);
|
|
||||||
public:
|
public:
|
||||||
|
vector<pair<string, string>> combinations(const vector<string>&);
|
||||||
Node(const std::string&, int);
|
Node(const std::string&, int);
|
||||||
void addParent(Node*);
|
void addParent(Node*);
|
||||||
void addChild(Node*);
|
void addChild(Node*);
|
||||||
|
@@ -1 +1 @@
|
|||||||
__version__ = "0.1.1"
|
__version__ = "0.2.0"
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* Generated by Cython 0.29.35 */
|
/* Generated by Cython 0.29.36 */
|
||||||
|
|
||||||
#ifndef PY_SSIZE_T_CLEAN
|
#ifndef PY_SSIZE_T_CLEAN
|
||||||
#define PY_SSIZE_T_CLEAN
|
#define PY_SSIZE_T_CLEAN
|
||||||
@@ -9,8 +9,8 @@
|
|||||||
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
|
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
|
||||||
#error Cython requires Python 2.6+ or Python 3.3+.
|
#error Cython requires Python 2.6+ or Python 3.3+.
|
||||||
#else
|
#else
|
||||||
#define CYTHON_ABI "0_29_35"
|
#define CYTHON_ABI "0_29_36"
|
||||||
#define CYTHON_HEX_VERSION 0x001D23F0
|
#define CYTHON_HEX_VERSION 0x001D24F0
|
||||||
#define CYTHON_FUTURE_DIVISION 1
|
#define CYTHON_FUTURE_DIVISION 1
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#ifndef offsetof
|
#ifndef offsetof
|
||||||
@@ -85,7 +85,7 @@
|
|||||||
#define CYTHON_PEP489_MULTI_PHASE_INIT 1
|
#define CYTHON_PEP489_MULTI_PHASE_INIT 1
|
||||||
#endif
|
#endif
|
||||||
#undef CYTHON_USE_TP_FINALIZE
|
#undef CYTHON_USE_TP_FINALIZE
|
||||||
#define CYTHON_USE_TP_FINALIZE 0
|
#define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00)
|
||||||
#undef CYTHON_USE_DICT_VERSIONS
|
#undef CYTHON_USE_DICT_VERSIONS
|
||||||
#define CYTHON_USE_DICT_VERSIONS 0
|
#define CYTHON_USE_DICT_VERSIONS 0
|
||||||
#undef CYTHON_USE_EXC_INFO_STACK
|
#undef CYTHON_USE_EXC_INFO_STACK
|
||||||
@@ -383,9 +383,6 @@ class __Pyx_FakeReference {
|
|||||||
T *ptr;
|
T *ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
|
|
||||||
#define Py_OptimizeFlag 0
|
|
||||||
#endif
|
|
||||||
#define __PYX_BUILD_PY_SSIZE_T "n"
|
#define __PYX_BUILD_PY_SSIZE_T "n"
|
||||||
#define CYTHON_FORMAT_SSIZE_T "z"
|
#define CYTHON_FORMAT_SSIZE_T "z"
|
||||||
#if PY_MAJOR_VERSION < 3
|
#if PY_MAJOR_VERSION < 3
|
||||||
@@ -463,6 +460,11 @@ class __Pyx_FakeReference {
|
|||||||
#endif
|
#endif
|
||||||
#define __Pyx_DefaultClassType PyType_Type
|
#define __Pyx_DefaultClassType PyType_Type
|
||||||
#endif
|
#endif
|
||||||
|
#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY
|
||||||
|
#define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o)
|
||||||
|
#else
|
||||||
|
#define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o)
|
||||||
|
#endif
|
||||||
#ifndef Py_TPFLAGS_CHECKTYPES
|
#ifndef Py_TPFLAGS_CHECKTYPES
|
||||||
#define Py_TPFLAGS_CHECKTYPES 0
|
#define Py_TPFLAGS_CHECKTYPES 0
|
||||||
#endif
|
#endif
|
||||||
@@ -2601,7 +2603,7 @@ static PyObject *__pyx_tp_new_10bayesclass_17cppSelectFeatures_CSelectKBestWeigh
|
|||||||
|
|
||||||
static void __pyx_tp_dealloc_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted(PyObject *o) {
|
static void __pyx_tp_dealloc_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted(PyObject *o) {
|
||||||
#if CYTHON_USE_TP_FINALIZE
|
#if CYTHON_USE_TP_FINALIZE
|
||||||
if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE) && Py_TYPE(o)->tp_finalize) && (!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))) {
|
if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE) && Py_TYPE(o)->tp_finalize) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) {
|
||||||
if (PyObject_CallFinalizerFromDealloc(o)) return;
|
if (PyObject_CallFinalizerFromDealloc(o)) return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -16,7 +16,7 @@ from pgmpy.base import DAG
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from fimdlp.mdlp import FImdlp
|
from fimdlp.mdlp import FImdlp
|
||||||
from .cppSelectFeatures import CSelectKBestWeighted
|
from .cppSelectFeatures import CSelectKBestWeighted
|
||||||
from .BayesNet import BayesNetwork
|
from .BayesNet import BayesNetwork, CMetrics
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
# Store the information needed to build the model
|
# Store the information needed to build the model
|
||||||
self.build_dataset()
|
self.build_dataset()
|
||||||
# Build the DAG
|
# Build the DAG
|
||||||
self._build()
|
self._build(kwargs)
|
||||||
# Train the model
|
# Train the model
|
||||||
self._train(kwargs)
|
self._train(kwargs)
|
||||||
self.fitted_ = True
|
self.fitted_ = True
|
||||||
@@ -153,11 +153,14 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
# Return the classifier
|
# Return the classifier
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _build(self):
|
def _build(self, kwargs):
|
||||||
"""This method should be implemented by the subclasses to
|
self.model_ = BayesNetwork()
|
||||||
build the DAG
|
features = kwargs["features"]
|
||||||
"""
|
states = kwargs["state_names"]
|
||||||
...
|
for feature in features:
|
||||||
|
self.model_.addNode(feature, len(states[feature]))
|
||||||
|
class_name = kwargs["class_name"]
|
||||||
|
self.model_.addNode(class_name, max(self.y_) + 1)
|
||||||
|
|
||||||
def _train(self, kwargs):
|
def _train(self, kwargs):
|
||||||
"""Build and train a BayesianNetwork from the DAG and the dataset
|
"""Build and train a BayesianNetwork from the DAG and the dataset
|
||||||
@@ -178,14 +181,10 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
# weighted=self.weighted_,
|
# weighted=self.weighted_,
|
||||||
# **states,
|
# **states,
|
||||||
# )
|
# )
|
||||||
self.model_ = BayesNetwork()
|
|
||||||
features = kwargs["features"]
|
features = kwargs["features"]
|
||||||
states = kwargs["state_names"]
|
|
||||||
for feature in features:
|
|
||||||
self.model_.addNode(feature, len(states[feature]))
|
|
||||||
class_name = kwargs["class_name"]
|
class_name = kwargs["class_name"]
|
||||||
self.model_.addNode(class_name, max(self.y_) + 1)
|
for source, destination in self.edges_:
|
||||||
for source, destination in self.dag_.edges():
|
|
||||||
self.model_.addEdge(source, destination)
|
self.model_.addEdge(source, destination)
|
||||||
self.model_.fit(self.X_, self.y_, features, class_name)
|
self.model_.fit(self.X_, self.y_, features, class_name)
|
||||||
self.states_computed_ = self.model_.getStates()
|
self.states_computed_ = self.model_.getStates()
|
||||||
@@ -307,7 +306,7 @@ class TAN(BayesBase):
|
|||||||
raise ValueError("Head index out of range")
|
raise ValueError("Head index out of range")
|
||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
def _build(self):
|
def _build(self, kwargs):
|
||||||
est = TreeSearch(
|
est = TreeSearch(
|
||||||
self.dataset_, root_node=self.feature_names_in_[self.head_]
|
self.dataset_, root_node=self.feature_names_in_[self.head_]
|
||||||
)
|
)
|
||||||
@@ -360,7 +359,7 @@ class KDB(BayesBase):
|
|||||||
]
|
]
|
||||||
return self._check_params_fit(X, y, expected_args, kwargs)
|
return self._check_params_fit(X, y, expected_args, kwargs)
|
||||||
|
|
||||||
def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
|
def _add_m_edges(self, idx, S_nodes, conditional_weights):
|
||||||
n_edges = min(self.k, len(S_nodes))
|
n_edges = min(self.k, len(S_nodes))
|
||||||
cond_w = conditional_weights.copy()
|
cond_w = conditional_weights.copy()
|
||||||
exit_cond = self.k == 0
|
exit_cond = self.k == 0
|
||||||
@@ -369,7 +368,7 @@ class KDB(BayesBase):
|
|||||||
max_minfo = np.argmax(cond_w[idx, :])
|
max_minfo = np.argmax(cond_w[idx, :])
|
||||||
if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
|
if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
|
||||||
try:
|
try:
|
||||||
dag.add_edge(
|
self.add_edge(
|
||||||
self.feature_names_in_[max_minfo],
|
self.feature_names_in_[max_minfo],
|
||||||
self.feature_names_in_[idx],
|
self.feature_names_in_[idx],
|
||||||
)
|
)
|
||||||
@@ -380,7 +379,7 @@ class KDB(BayesBase):
|
|||||||
cond_w[idx, max_minfo] = -1
|
cond_w[idx, max_minfo] = -1
|
||||||
exit_cond = num == n_edges or np.all(cond_w[idx, :] <= self.theta)
|
exit_cond = num == n_edges or np.all(cond_w[idx, :] <= self.theta)
|
||||||
|
|
||||||
def _build(self):
|
def _build(self, kwargs):
|
||||||
"""
|
"""
|
||||||
1. For each feature Xi, compute mutual information, I(X;C),
|
1. For each feature Xi, compute mutual information, I(X;C),
|
||||||
where C is the class.
|
where C is the class.
|
||||||
@@ -400,14 +399,20 @@ class KDB(BayesBase):
|
|||||||
Compute the conditional probabilility infered by the structure of BN by
|
Compute the conditional probabilility infered by the structure of BN by
|
||||||
using counts from DB, and output BN.
|
using counts from DB, and output BN.
|
||||||
"""
|
"""
|
||||||
|
super()._build(kwargs)
|
||||||
# 1. get the mutual information between each feature and the class
|
# 1. get the mutual information between each feature and the class
|
||||||
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
|
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
|
||||||
# 2. symmetric matrix where each element represents I(X, Y| class_node)
|
# 2. symmetric matrix where each element represents I(X, Y| class_node)
|
||||||
conditional_weights = TreeSearch(
|
metrics = CMetrics(
|
||||||
self.dataset_
|
self.X_,
|
||||||
)._get_conditional_weights(
|
self.y_,
|
||||||
self.dataset_, self.class_name_, show_progress=self.show_progress
|
self.features_,
|
||||||
|
self.class_name_,
|
||||||
|
self.n_classes_,
|
||||||
)
|
)
|
||||||
|
c_weights = np.array(metrics.conditionalEdgeWeights())
|
||||||
|
n_var = self.n_features_in_ + 1
|
||||||
|
conditional_weights = np.reshape(c_weights, (n_var, n_var))
|
||||||
'''
|
'''
|
||||||
# Step 1: Compute edge weights for a fully connected graph.
|
# Step 1: Compute edge weights for a fully connected graph.
|
||||||
n_vars = len(data.columns)
|
n_vars = len(data.columns)
|
||||||
@@ -442,18 +447,15 @@ class KDB(BayesBase):
|
|||||||
# 3. Let the used variable list, S, be empty.
|
# 3. Let the used variable list, S, be empty.
|
||||||
S_nodes = []
|
S_nodes = []
|
||||||
# 4. Let the DAG being constructed, BN, begin with a single class node
|
# 4. Let the DAG being constructed, BN, begin with a single class node
|
||||||
dag = BayesianNetwork()
|
|
||||||
dag.add_node(self.class_name_) # , state_names=self.classes_)
|
|
||||||
# 5. Repeat until S includes all domain features
|
# 5. Repeat until S includes all domain features
|
||||||
# 5.1 Select feature Xmax which is not in S and has the largest value
|
# 5.1 Select feature Xmax which is not in S and has the largest value
|
||||||
for idx in np.argsort(mutual):
|
for idx in np.argsort(mutual):
|
||||||
# 5.2 Add a node to BN representing Xmax.
|
# 5.2 Add a node to BN representing Xmax.
|
||||||
feature = self.feature_names_in_[idx]
|
feature = self.feature_names_in_[idx]
|
||||||
dag.add_node(feature)
|
|
||||||
# 5.3 Add an arc from C to Xmax in BN.
|
# 5.3 Add an arc from C to Xmax in BN.
|
||||||
dag.add_edge(self.class_name_, feature)
|
self.edges_.append(self.class_name_, feature)
|
||||||
# 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S
|
# 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S
|
||||||
self._add_m_edges(dag, idx, S_nodes, conditional_weights)
|
self._add_m_edges(idx, S_nodes, conditional_weights)
|
||||||
# 5.5 Add Xmax to S.
|
# 5.5 Add Xmax to S.
|
||||||
S_nodes.append(idx)
|
S_nodes.append(idx)
|
||||||
self.dag_ = dag
|
self.dag_ = dag
|
||||||
@@ -851,7 +853,7 @@ class BoostSPODE(BayesBase):
|
|||||||
]
|
]
|
||||||
return self._check_params_fit(X, y, expected_args, kwargs)
|
return self._check_params_fit(X, y, expected_args, kwargs)
|
||||||
|
|
||||||
def _build(self):
|
def _build(self, _):
|
||||||
class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
|
class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
|
||||||
feature_edges = [
|
feature_edges = [
|
||||||
(self.sparent_, f)
|
(self.sparent_, f)
|
||||||
|
Reference in New Issue
Block a user