Complete CFS tested with Python mufs
This commit is contained in:
parent
40d1dad5d8
commit
5022a4dc90
18
.vscode/c_cpp_properties.json
vendored
Normal file
18
.vscode/c_cpp_properties.json
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Mac",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**"
|
||||
],
|
||||
"defines": [],
|
||||
"macFrameworkPath": [
|
||||
"/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
|
||||
],
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "c++17",
|
||||
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
@ -60,24 +60,13 @@ namespace bayesnet {
|
||||
{
|
||||
return scoresKBest;
|
||||
}
|
||||
template <class T>
|
||||
vector<pair<T, T>> Metrics::doCombinations(const vector<T>& source)
|
||||
{
|
||||
vector<pair<T, T>> result;
|
||||
for (int i = 0; i < source.size(); ++i) {
|
||||
T temp = source[i];
|
||||
for (int j = i + 1; j < source.size(); ++j) {
|
||||
result.push_back({ temp, source[j] });
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
|
||||
{
|
||||
auto result = vector<double>();
|
||||
auto source = vector<string>(features);
|
||||
source.push_back(className);
|
||||
auto combinations = doCombinations<string>(source);
|
||||
auto combinations = doCombinations(source);
|
||||
// Compute class prior
|
||||
auto margin = torch::zeros({ classNumStates }, torch::kFloat);
|
||||
for (int value = 0; value < classNumStates; ++value) {
|
||||
@ -123,6 +112,11 @@ namespace bayesnet {
|
||||
torch::Tensor counts = feature.bincount(weights);
|
||||
double totalWeight = counts.sum().item<double>();
|
||||
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
|
||||
// cout << "Probs: ";
|
||||
// for (int i = 0; i < probs.size(0); ++i) {
|
||||
// cout << probs[i].item<double>() << ", ";
|
||||
// }
|
||||
// cout << endl;
|
||||
torch::Tensor logProbs = torch::log(probs);
|
||||
torch::Tensor entropy = -probs * logProbs;
|
||||
return entropy.nansum().item<double>();
|
||||
|
@ -18,7 +18,17 @@ namespace bayesnet {
|
||||
double entropy(const Tensor& feature, const Tensor& weights);
|
||||
vector<string> features;
|
||||
template <class T>
|
||||
vector<pair<T, T>> doCombinations(const vector<T>& source);
|
||||
vector<pair<T, T>> doCombinations(const vector<T>& source)
|
||||
{
|
||||
vector<pair<T, T>> result;
|
||||
for (int i = 0; i < source.size(); ++i) {
|
||||
T temp = source[i];
|
||||
for (int j = i + 1; j < source.size(); ++j) {
|
||||
result.push_back({ temp, source[j] });
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
public:
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
|
||||
|
@ -2,11 +2,11 @@
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
#include "BoostAODE.h"
|
||||
#include "BayesMetrics.h"
|
||||
#include "Colors.h"
|
||||
#include "Folding.h"
|
||||
#include "Paths.h"
|
||||
#include <openssl/evp.h>
|
||||
#include "CFS.h"
|
||||
|
||||
namespace bayesnet {
|
||||
BoostAODE::BoostAODE() : Ensemble() {}
|
||||
@ -98,13 +98,15 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
output += "]";
|
||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
int maxFeatures = 0;
|
||||
auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
// std::size_t str_hash = std::hash<std::string>{}(output);
|
||||
string str_hash = sha256(output);
|
||||
stringstream oss;
|
||||
oss << platform::Paths::cfs() << str_hash << ".json";
|
||||
string name = oss.str();
|
||||
ifstream file(name);
|
||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
if (file.is_open()) {
|
||||
nlohmann::json cfsFeatures = nlohmann::json::parse(file);
|
||||
file.close();
|
||||
|
@ -17,14 +17,22 @@ namespace bayesnet {
|
||||
*/
|
||||
auto x = samples.index({ a, "..." });
|
||||
auto y = samples.index({ b, "..." });
|
||||
return 2.0 * mutualInformation(y, x, weights) / (entropy(x, weights) + entropy(y, weights));
|
||||
auto mu = mutualInformation(x, y, weights);
|
||||
// cout << "Mutual Information: (" << a << ", " << b << ") = " << mu << endl;
|
||||
auto hx = entropy(x, weights);
|
||||
// cout << "Entropy X: " << hx << endl;
|
||||
auto hy = entropy(y, weights);
|
||||
// cout << "Entropy Y: " << hy << endl;
|
||||
return 2.0 * mu / (hx + hy);
|
||||
}
|
||||
void CFS::computeSuLabels()
|
||||
{
|
||||
// Compute Simmetrical Uncertainty between features and labels
|
||||
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
|
||||
// cout << "SuLabels" << endl;
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
suLabels[i] = symmetricalUncertainty(i, -1);
|
||||
suLabels.push_back(symmetricalUncertainty(i, -1));
|
||||
// cout << i << " -> " << suLabels[i] << endl;
|
||||
}
|
||||
|
||||
}
|
||||
@ -44,7 +52,7 @@ namespace bayesnet {
|
||||
}
|
||||
double rff = 0;
|
||||
int n = cfsFeatures.size();
|
||||
for (const auto& item : doCombinations<int>(cfsFeatures)) {
|
||||
for (const auto& item : doCombinations(cfsFeatures)) {
|
||||
rff += computeSuFeatures(item.first, item.second);
|
||||
}
|
||||
return rcf / sqrt(n + (n * n - n) * rff);
|
||||
@ -58,25 +66,58 @@ namespace bayesnet {
|
||||
auto feature = featureOrder[0];
|
||||
cfsFeatures.push_back(feature);
|
||||
cfsScores.push_back(suLabels[feature]);
|
||||
cfsFeatures.erase(cfsFeatures.begin());
|
||||
while (continueCondition) {
|
||||
double merit = numeric_limits<double>::lowest();
|
||||
int bestFeature = -1;
|
||||
for (auto feature : featureOrder) {
|
||||
cfsFeatures.push_back(feature);
|
||||
auto meritNew = computeMerit(); // Compute merit with cfsFeatures
|
||||
//cout << "MeritNew: " << meritNew << " Merit: " << merit << endl;
|
||||
if (meritNew > merit) {
|
||||
merit = meritNew;
|
||||
bestFeature = feature;
|
||||
}
|
||||
cfsFeatures.pop_back();
|
||||
}
|
||||
if (bestFeature == -1) {
|
||||
throw runtime_error("Feature not found");
|
||||
}
|
||||
cfsFeatures.push_back(bestFeature);
|
||||
cfsScores.push_back(merit);
|
||||
featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), feature), featureOrder.end());
|
||||
featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
|
||||
continueCondition = computeContinueCondition(featureOrder);
|
||||
}
|
||||
fitted = true;
|
||||
}
|
||||
void CFS::test()
|
||||
{
|
||||
cout << "H(y): " << entropy(samples.index({ -1, "..." }), weights) << endl;
|
||||
cout << "y: ";
|
||||
auto y = samples.index({ -1, "..." });
|
||||
for (int i = 0; i < y.size(0); ++i) {
|
||||
cout << y[i].item<double>() << ", ";
|
||||
}
|
||||
cout << endl;
|
||||
computeSuLabels();
|
||||
// cout << "Probabilites of features: " << endl;
|
||||
// for (const auto& featureName : features) {
|
||||
// int featureIdx = find(features.begin(), features.end(), featureName) - features.begin();
|
||||
// cout << featureName << "(" << featureIdx << "): ";
|
||||
// auto feature = samples.index({ featureIdx, "..." });
|
||||
// torch::Tensor counts = feature.bincount(weights);
|
||||
// double totalWeight = counts.sum().item<double>();
|
||||
// torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
|
||||
// for (int i = 0; i < probs.size(0); ++i) {
|
||||
// cout << probs[i].item<double>() << ", ";
|
||||
// }
|
||||
// cout << endl;
|
||||
// // for (int i = 0; i < x.size(0); ++i) {
|
||||
// // cout << x[i].item<double>() << ", ";
|
||||
// // }
|
||||
// // cout << endl;
|
||||
// }
|
||||
}
|
||||
bool CFS::computeContinueCondition(const vector<int>& featureOrder)
|
||||
{
|
||||
if (cfsFeatures.size() == maxFeatures || featureOrder.size() == 0) {
|
||||
|
@ -11,6 +11,7 @@ namespace bayesnet {
|
||||
CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
|
||||
virtual ~CFS() {};
|
||||
void fit();
|
||||
void test();
|
||||
vector<int> getFeatures() const;
|
||||
vector<double> getScores() const;
|
||||
private:
|
||||
|
@ -9,7 +9,7 @@ add_executable(b_main main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Mo
|
||||
add_executable(b_manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
||||
add_executable(b_list list.cc Datasets.cc Dataset.cc)
|
||||
add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc)
|
||||
add_executable(testx testx.cpp Datasets.cc Dataset.cc Folding.cc)
|
||||
add_executable(testx testx.cpp Datasets.cc Dataset.cc Folding.cc )
|
||||
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
||||
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
|
||||
target_link_libraries(b_manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "Network.h"
|
||||
#include "ArffFiles.h"
|
||||
#include "CPPFImdlp.h"
|
||||
#include "CFS.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace platform;
|
||||
@ -191,22 +192,43 @@ int main()
|
||||
// }
|
||||
// cout << "***********************************************************************************************" << endl;
|
||||
// }
|
||||
const string file_name = "iris";
|
||||
auto net = bayesnet::Network();
|
||||
// const string file_name = "iris";
|
||||
// auto net = bayesnet::Network();
|
||||
// auto dt = Datasets(true, "Arff");
|
||||
// auto raw = RawDatasets("iris", true);
|
||||
// auto [X, y] = dt.getVectors(file_name);
|
||||
// cout << "Dataset dims " << raw.dataset.sizes() << endl;
|
||||
// cout << "weights dims " << raw.weights.sizes() << endl;
|
||||
// cout << "States dims " << raw.statest.size() << endl;
|
||||
// cout << "features: ";
|
||||
// for (const auto& feature : raw.featurest) {
|
||||
// cout << feature << ", ";
|
||||
// net.addNode(feature);
|
||||
// }
|
||||
// net.addNode(raw.classNamet);
|
||||
// cout << endl;
|
||||
// net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto dt = Datasets(true, "Arff");
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto [X, y] = dt.getVectors(file_name);
|
||||
cout << "Dataset dims " << raw.dataset.sizes() << endl;
|
||||
cout << "weights dims " << raw.weights.sizes() << endl;
|
||||
cout << "States dims " << raw.statest.size() << endl;
|
||||
cout << "features: ";
|
||||
for (const auto& feature : raw.featurest) {
|
||||
cout << feature << ", ";
|
||||
net.addNode(feature);
|
||||
for (const auto& name : dt.getNames()) {
|
||||
//for (const auto& name : { "iris" }) {
|
||||
auto [X, y] = dt.getTensors(name);
|
||||
auto features = dt.getFeatures(name);
|
||||
auto states = dt.getStates(name);
|
||||
auto className = dt.getClassName(name);
|
||||
int maxFeatures = 0;
|
||||
auto classNumStates = states.at(className).size();
|
||||
torch::Tensor weights = torch::full({ X.size(1) }, 1.0 / X.size(1), torch::kDouble);
|
||||
auto dataset = X;
|
||||
auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
|
||||
dataset = torch::cat({ dataset, yresized }, 0);
|
||||
auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, classNumStates, weights);
|
||||
cfs.fit();
|
||||
cout << "Dataset: " << name << " CFS features: ";
|
||||
for (const auto& feature : cfs.getFeatures()) {
|
||||
cout << feature << ", ";
|
||||
}
|
||||
cout << "end." << endl;
|
||||
}
|
||||
net.addNode(raw.classNamet);
|
||||
cout << endl;
|
||||
net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user