Optimize BoostAODE -> XBAODE #33
@@ -93,36 +93,42 @@ namespace bayesnet {
|
|||||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
|
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
dimensions.clear();
|
dimensions.clear();
|
||||||
|
dimensions.reserve(parents.size() + 1);
|
||||||
// Get dimensions of the CPT
|
// Get dimensions of the CPT
|
||||||
dimensions.push_back(numStates);
|
dimensions.push_back(numStates);
|
||||||
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
for (const auto& parent : parents) {
|
||||||
// Create a tensor of zeros with the dimensions of the CPT
|
dimensions.push_back(parent->getNumStates());
|
||||||
cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing;
|
}
|
||||||
// Fill table with counts
|
//transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||||
auto pos = find(features.begin(), features.end(), name);
|
// Create a tensor initialized with smoothing
|
||||||
if (pos == features.end()) {
|
cpTable = torch::full(dimensions, smoothing, torch::kDouble);
|
||||||
throw std::logic_error("Feature " + name + " not found in dataset");
|
// Create a map for quick feature index lookup
|
||||||
|
std::unordered_map<std::string, int> featureIndexMap;
|
||||||
|
for (size_t i = 0; i < features.size(); ++i) {
|
||||||
|
featureIndexMap[features[i]] = i;
|
||||||
|
}
|
||||||
|
// Fill table with counts
|
||||||
|
// Get the index of this node's feature
|
||||||
|
int name_index = featureIndexMap[name];
|
||||||
|
// Get parent indices in dataset
|
||||||
|
std::vector<int> parent_indices;
|
||||||
|
parent_indices.reserve(parents.size());
|
||||||
|
for (const auto& parent : parents) {
|
||||||
|
parent_indices.push_back(featureIndexMap[parent->getName()]);
|
||||||
}
|
}
|
||||||
int name_index = pos - features.begin();
|
|
||||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||||
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
|
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
|
||||||
coordinates.clear();
|
coordinates.clear();
|
||||||
auto sample = dataset.index({ "...", n_sample });
|
auto sample = dataset.index({ "...", n_sample });
|
||||||
coordinates.push_back(sample[name_index]);
|
coordinates.push_back(sample[name_index]);
|
||||||
for (auto parent : parents) {
|
for (size_t i = 0; i < parent_indices.size(); ++i) {
|
||||||
pos = find(features.begin(), features.end(), parent->getName());
|
coordinates.push_back(sample[parent_indices[i]]);
|
||||||
if (pos == features.end()) {
|
|
||||||
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
|
|
||||||
}
|
|
||||||
int parent_index = pos - features.begin();
|
|
||||||
coordinates.push_back(sample[parent_index]);
|
|
||||||
}
|
}
|
||||||
// Increment the count of the corresponding coordinate
|
// Increment the count of the corresponding coordinate
|
||||||
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
|
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
|
||||||
}
|
}
|
||||||
// Normalize the counts
|
// Normalize the counts (dividing each row by the sum of the row)
|
||||||
// Divide each row by the sum of the row
|
cpTable /= cpTable.sum(0, true);
|
||||||
cpTable = cpTable / cpTable.sum(0);
|
|
||||||
}
|
}
|
||||||
double Node::getFactorValue(std::map<std::string, int>& evidence)
|
double Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||||
{
|
{
|
||||||
|
@@ -18,7 +18,7 @@ include_directories(
|
|||||||
../tests/lib/Files
|
../tests/lib/Files
|
||||||
lib/json/include
|
lib/json/include
|
||||||
/usr/local/include
|
/usr/local/include
|
||||||
${FImdlp_INCLUDE_DIRS}
|
/usr/local/include/fimdlp/
|
||||||
)
|
)
|
||||||
|
|
||||||
add_executable(bayesnet_sample sample.cc)
|
add_executable(bayesnet_sample sample.cc)
|
||||||
|
@@ -60,7 +60,21 @@ int main(int argc, char* argv[])
|
|||||||
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
|
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
|
||||||
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
||||||
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
||||||
clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE);
|
torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble);
|
||||||
|
torch::Tensor dataset;
|
||||||
|
try {
|
||||||
|
auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
|
||||||
|
dataset = torch::cat({ X, yresized }, 0);
|
||||||
|
}
|
||||||
|
catch (const std::exception& e) {
|
||||||
|
std::stringstream oss;
|
||||||
|
oss << "* Error in X and y dimensions *\n";
|
||||||
|
oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||||
|
oss << "y dimensions: " << y.sizes();
|
||||||
|
throw std::runtime_error(oss.str());
|
||||||
|
}
|
||||||
|
//Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||||
|
clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE);
|
||||||
auto score = clf.score(X, y);
|
auto score = clf.score(X, y);
|
||||||
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
|
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
|
||||||
return 0;
|
return 0;
|
||||||
|
Reference in New Issue
Block a user