Remove unoptimized implementation of conditionalEntropy
This commit is contained in:
@@ -177,6 +177,8 @@ namespace bayesnet {
|
||||
|
||||
// Total weight sum
|
||||
double totalWeight = torch::sum(weights).item<double>();
|
||||
if (totalWeight == 0)
|
||||
return 0;
|
||||
|
||||
// Compute the conditional entropy
|
||||
double conditionalEntropy = 0.0;
|
||||
@@ -192,63 +194,8 @@ namespace bayesnet {
|
||||
conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
|
||||
}
|
||||
}
|
||||
|
||||
return conditionalEntropy;
|
||||
}
|
||||
double Metrics::conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
|
||||
{
|
||||
int numSamples = firstFeature.size(0);
|
||||
// Get unique values for each variable
|
||||
auto [uniqueX, countsX] = at::_unique(firstFeature);
|
||||
auto [uniqueC, countsC] = at::_unique(labels);
|
||||
|
||||
// Compute p(x,c) for each unique value of X and C
|
||||
std::map<int, std::map<std::pair<int, int>, double>> jointCounts;
|
||||
double totalWeight = 0;
|
||||
for (auto i = 0; i < numSamples; i++) {
|
||||
int x = firstFeature[i].item<int>();
|
||||
int y = secondFeature[i].item<int>();
|
||||
int c = labels[i].item<int>();
|
||||
const auto key = std::make_pair(x, c);
|
||||
jointCounts[y][key] += weights[i].item<double>();
|
||||
totalWeight += weights[i].item<float>();
|
||||
}
|
||||
if (totalWeight == 0)
|
||||
return 0;
|
||||
double entropyValue = 0;
|
||||
|
||||
// Iterate over unique values of X and C
|
||||
for (int i = 0; i < uniqueX.size(0); i++) {
|
||||
int x_val = uniqueX[i].item<int>();
|
||||
for (int j = 0; j < uniqueC.size(0); j++) {
|
||||
int c_val = uniqueC[j].item<int>();
|
||||
double p_xc = 0; // Probability of (X=x, C=c)
|
||||
double entropy_f = 0;
|
||||
// Find joint counts for this specific (X,C) combination
|
||||
for (auto& [y, jointCount] : jointCounts) {
|
||||
auto joint_count_xc = jointCount.find({ x_val, c_val });
|
||||
if (joint_count_xc != jointCount.end()) {
|
||||
p_xc += joint_count_xc->second;
|
||||
}
|
||||
}
|
||||
// Only calculate conditional entropy if p(X=x, C=c) > 0
|
||||
if (p_xc > 0) {
|
||||
p_xc /= totalWeight;
|
||||
for (auto& [y, jointCount] : jointCounts) {
|
||||
auto key = std::make_pair(x_val, c_val);
|
||||
double p_y_xc = jointCount[key] / p_xc;
|
||||
|
||||
if (p_y_xc > 0) {
|
||||
entropy_f -= p_y_xc * log(p_y_xc);
|
||||
}
|
||||
}
|
||||
}
|
||||
entropyValue += p_xc * entropy_f;
|
||||
}
|
||||
}
|
||||
return entropyValue;
|
||||
return 0;
|
||||
}
|
||||
// I(X;Y) = H(Y) - H(Y|X)
|
||||
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
|
||||
{
|
||||
|
@@ -25,7 +25,6 @@ namespace bayesnet {
|
||||
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
|
||||
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
double conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
protected:
|
||||
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
|
||||
std::string className;
|
||||
|
Reference in New Issue
Block a user