mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-16 08:05:57 +00:00
Begin cython structure
This commit is contained in:
7
CMakeLists.txt
Normal file
7
CMakeLists.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(feature)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
|
||||
add_executable(feature bayesclass/cpp/FeatureSelect.cpp)
|
@@ -1 +1,2 @@
|
||||
include README.md LICENSE
|
||||
include README.md LICENSE
|
||||
include bayesclass/FeatureSelect.h
|
4
Makefile
4
Makefile
@@ -16,6 +16,10 @@ lint: ## Lint and static-check
|
||||
flake8 bayesclass
|
||||
mypy bayesclass
|
||||
|
||||
feature: ## compile FeatureSelect
|
||||
cmake -B build feature
|
||||
|
||||
|
||||
push: ## Push code with tags
|
||||
git push && git push --tags
|
||||
|
||||
|
7
bayesclass/CMakeLists.txt
Normal file
7
bayesclass/CMakeLists.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(feature)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
|
||||
add_executable(feature FeatureSelect.cpp)
|
134
bayesclass/FeatureSelect.cpp
Normal file
134
bayesclass/FeatureSelect.cpp
Normal file
@@ -0,0 +1,134 @@
|
||||
#include "FeatureSelect.h"
|
||||
#include <iostream>
|
||||
namespace features {
|
||||
// SelectKBestWeighted::SelectKBestWeighted(samples_t& samples, labels_t& labels, weights_t& weights, int k)
|
||||
// : samples(samples), labels(labels), weights(weights), k(k)
|
||||
// {
|
||||
// // if (samples.size() == 0 || samples[0].size() == 0)
|
||||
// // throw invalid_argument("features must be a non-empty matrix");
|
||||
// // if (samples.size() != labels.size())
|
||||
// // throw invalid_argument("number of samples and labels must be equal");
|
||||
// // if (samples.size() != weights.size())
|
||||
// // throw invalid_argument("number of samples and weights must be equal");
|
||||
// // if (k < 1 || k > static_cast<int>(samples[0].size()))
|
||||
// // throw invalid_argument("k must be between 1 and number of features");
|
||||
// numFeatures = 0;
|
||||
// numClasses = 0;
|
||||
// numSamples = 0;
|
||||
// fitted = false;
|
||||
// }
|
||||
SelectKBestWeighted::SelectKBestWeighted(samples_t& samples) : samples(samples) {}
|
||||
void SelectKBestWeighted::SelectKBestWeighted::fit()
|
||||
{
|
||||
// auto labelsCopy = labels;
|
||||
numFeatures = 0;//samples[0].size();
|
||||
numSamples = samples.size();
|
||||
// sort(labelsCopy.begin(), labelsCopy.end());
|
||||
// auto last = unique(labelsCopy.begin(), labelsCopy.end());
|
||||
// labelsCopy.erase(last, labelsCopy.end());
|
||||
// numClasses = labelsCopy.size();
|
||||
// score.reserve(numFeatures);
|
||||
// for (int i = 0; i < numFeatures; ++i) {
|
||||
// score.push_back(MutualInformation(i));
|
||||
// }
|
||||
outputValues();
|
||||
fitted = true;
|
||||
}
|
||||
void SelectKBestWeighted::outputValues()
|
||||
{
|
||||
cout << "numFeatures: " << numFeatures << endl;
|
||||
// cout << "numClasses: " << numClasses << endl;
|
||||
cout << "numSamples: " << numSamples << endl;
|
||||
// cout << "k: " << k << endl;
|
||||
// cout << "weights: ";
|
||||
// for (auto item : weights)
|
||||
// cout << item << ", ";
|
||||
// cout << "end." << endl;
|
||||
// cout << "labels: ";
|
||||
// for (auto item : labels)
|
||||
// cout << item << ", ";
|
||||
// cout << "end." << endl;
|
||||
cout << "samples: ";
|
||||
for (auto item : samples) {
|
||||
// for (auto item2 : item)
|
||||
// cout << item2 << ", ";
|
||||
// cout << "end." << endl;
|
||||
cout << item << ", ";
|
||||
}
|
||||
cout << "end." << endl;
|
||||
}
|
||||
// precision_t SelectKBestWeighted::entropyLabel()
|
||||
// {
|
||||
// return entropy(labels);
|
||||
// }
|
||||
// precision_t SelectKBestWeighted::entropy(const sample_t& data)
|
||||
// {
|
||||
// precision_t p;
|
||||
// precision_t ventropy = 0, totalWeight = 0;
|
||||
// score_t counts(numClasses + 1, 0);
|
||||
// for (auto i = 0; i < data.size(); ++i) {
|
||||
// counts[data[i]] += weights[i];
|
||||
// totalWeight += weights[i];
|
||||
// }
|
||||
// for (auto count : counts) {
|
||||
// p = count / totalWeight;
|
||||
// ventropy -= p * log2(p);
|
||||
// }
|
||||
// return ventropy;
|
||||
// }
|
||||
// // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
|
||||
// precision_t SelectKBestWeighted::conditionalEntropy(const int feature)
|
||||
// {
|
||||
// unordered_map<value_t, precision_t> featureCounts;
|
||||
// unordered_map<value_t, unordered_map<value_t, precision_t>> jointCounts;
|
||||
// featureCounts.clear();
|
||||
// jointCounts.clear();
|
||||
// auto totalWeight = 0;
|
||||
// for (auto i = 0; i < numSamples; i++) {
|
||||
// featureCounts[samples[feature][i]] += weights[i];
|
||||
// jointCounts[samples[feature][i]][labels[i]] += weights[i];
|
||||
// totalWeight += weights[i];
|
||||
// }
|
||||
// precision_t entropy = 0;
|
||||
// for (auto& [f, count] : featureCounts) {
|
||||
// auto p_f = count / totalWeight;
|
||||
// precision_t entropy_f = 0;
|
||||
// for (auto& [l, jointCount] : jointCounts[f]) {
|
||||
// auto p_l_f = jointCount / totalWeight;
|
||||
// entropy_f -= p_l_f * log2(p_l_f);
|
||||
// }
|
||||
// entropy += p_f * entropy_f;
|
||||
// }
|
||||
// return entropy;
|
||||
// }
|
||||
|
||||
// // I(X;Y) = H(Y) - H(Y|X)
|
||||
// precision_t SelectKBestWeighted::MutualInformation(const int i)
|
||||
// {
|
||||
// // return entropyLabel() - conditionalEntropy(i);
|
||||
// return 25 / (i + 1);
|
||||
// }
|
||||
score_t SelectKBestWeighted::getScore() const
|
||||
{
|
||||
if (!fitted)
|
||||
throw logic_error("score not fitted");
|
||||
return score;
|
||||
}
|
||||
}
|
||||
|
||||
// using namespace std;
|
||||
|
||||
// int main()
|
||||
// {
|
||||
// vector<vector<int>> samples = { {1, 2, 3}, {4, 5, 6}, {7, 8, 9} };
|
||||
// vector<int> labels = { 1, 2, 1 };
|
||||
// vector<float> weights = { 0.1, 0.7, 0.2 };
|
||||
// int k = 3;
|
||||
// auto metric = features::SelectKBestWeighted(samples, labels, weights, k);
|
||||
// metric.fit();
|
||||
// cout << "score: ";
|
||||
// for (auto item : metric.getScore())
|
||||
// cout << item << ", ";
|
||||
// cout << "end." << endl;
|
||||
// return 0;
|
||||
// }
|
38
bayesclass/FeatureSelect.h
Normal file
38
bayesclass/FeatureSelect.h
Normal file
@@ -0,0 +1,38 @@
|
||||
#ifndef SELECT_K_BEST_WEIGHTED_H
|
||||
#define SELECT_K_BEST_WEIGHTED_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
namespace features {
|
||||
typedef float precision_t;
|
||||
typedef int value_t;
|
||||
typedef vector<value_t> sample_t;
|
||||
// typedef vector<sample_t> samples_t;
|
||||
typedef vector<value_t> samples_t;
|
||||
typedef vector<value_t> labels_t;
|
||||
typedef vector<precision_t> score_t, weights_t;
|
||||
|
||||
class SelectKBestWeighted {
|
||||
private:
|
||||
samples_t& samples;
|
||||
// const labels_t& labels;
|
||||
// const weights_t& weights;
|
||||
// const int k;
|
||||
int numFeatures, numClasses, numSamples;
|
||||
bool fitted;
|
||||
score_t score;
|
||||
// precision_t entropyLabel();
|
||||
// precision_t entropy(const sample_t&);
|
||||
// precision_t conditionalEntropy(const int);
|
||||
// precision_t MutualInformation(const int);
|
||||
void outputValues();
|
||||
public:
|
||||
// SelectKBestWeighted(samples_t&, labels_t&, weights_t&, int);
|
||||
SelectKBestWeighted(samples_t&);
|
||||
void fit();
|
||||
score_t getScore() const;
|
||||
static inline string version() { return "0.1.0"; };
|
||||
};
|
||||
}
|
||||
#endif
|
37
bayesclass/FeatureTest.cpp
Normal file
37
bayesclass/FeatureTest.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
#include "FeatureTest.h"
|
||||
#include <iostream>
|
||||
namespace featuresTest {
|
||||
SelectKBest::SelectKBest(vector<int>& samples) : samples(samples) {}
|
||||
SelectKBest::SelectKBest() = default;
|
||||
SelectKBest::~SelectKBest() = default;
|
||||
void SelectKBest::SelectKBest::fit()
|
||||
{
|
||||
numFeatures = 0;
|
||||
numSamples = samples.size();
|
||||
outputValues();
|
||||
fitted = true;
|
||||
}
|
||||
void SelectKBest::outputValues()
|
||||
{
|
||||
cout << "numFeatures: " << numFeatures << endl;
|
||||
// cout << "numClasses: " << numClasses << endl;
|
||||
cout << "numSamples: " << numSamples << endl;
|
||||
// cout << "k: " << k << endl;
|
||||
// cout << "weights: ";
|
||||
// for (auto item : weights)
|
||||
// cout << item << ", ";
|
||||
// cout << "end." << endl;
|
||||
// cout << "labels: ";
|
||||
// for (auto item : labels)
|
||||
// cout << item << ", ";
|
||||
// cout << "end." << endl;
|
||||
cout << "samples: ";
|
||||
for (auto item : samples) {
|
||||
// for (auto item2 : item)
|
||||
// cout << item2 << ", ";
|
||||
// cout << "end." << endl;
|
||||
cout << item << ", ";
|
||||
}
|
||||
cout << "end." << endl;
|
||||
}
|
||||
}
|
30
bayesclass/FeatureTest.h
Normal file
30
bayesclass/FeatureTest.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef SELECT_K_BEST_TEST_H
|
||||
#define SELECT_K_BEST_TEST_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
namespace featuresTest {
|
||||
typedef float precision_t;
|
||||
typedef int value_t;
|
||||
typedef vector<value_t> sample_t;
|
||||
// typedef vector<sample_t> samples_t;
|
||||
typedef vector<value_t> samples_t;
|
||||
typedef vector<value_t> labels_t;
|
||||
typedef vector<precision_t> score_t, weights_t;
|
||||
|
||||
class SelectKBest {
|
||||
private:
|
||||
vector<int>& samples;
|
||||
int numFeatures, numClasses, numSamples;
|
||||
bool fitted;
|
||||
void outputValues();
|
||||
public:
|
||||
SelectKBest();
|
||||
SelectKBest(vector<int>&);
|
||||
~SelectKBest();
|
||||
void fit();
|
||||
static inline string version() { return "0.1.0"; };
|
||||
};
|
||||
}
|
||||
#endif
|
1
bayesclass/cSelectFeatures.c
Normal file
1
bayesclass/cSelectFeatures.c
Normal file
@@ -0,0 +1 @@
|
||||
#error Do not use this file, it is the result of a failed Cython compilation.
|
3884
bayesclass/cSelectFeatures.cpp
Normal file
3884
bayesclass/cSelectFeatures.cpp
Normal file
File diff suppressed because it is too large
Load Diff
51
bayesclass/cSelectFeatures.pyx
Normal file
51
bayesclass/cSelectFeatures.pyx
Normal file
@@ -0,0 +1,51 @@
|
||||
# distutils: language = c++
|
||||
# cython: language_level = 3
|
||||
from libcpp.vector cimport vector
|
||||
from libcpp.string cimport string
|
||||
|
||||
cdef extern from "FeatureTest.h" namespace "featuresTest":
|
||||
ctypedef float precision_t
|
||||
cdef cppclass SelectKBest:
|
||||
SelectKBest(vector[int]&) except +
|
||||
void fit()
|
||||
string version()
|
||||
|
||||
cdef class CSelectKBest:
|
||||
cdef SelectKBest *thisptr
|
||||
def __cinit__(self, X):
|
||||
self.thisptr = new SelectKBest(X)
|
||||
def __dealloc__(self):
|
||||
del self.thisptr
|
||||
def fit(self,):
|
||||
self.thisptr.fit()
|
||||
return self
|
||||
def get_version(self):
|
||||
return self.thisptr.version()
|
||||
def __reduce__(self):
|
||||
return (CSelectKBest, ())
|
||||
|
||||
# cdef extern from "FeatureSelect.h" namespace "features":
|
||||
# ctypedef float precision_t
|
||||
# cdef cppclass SelectKBestWeighted:
|
||||
# SelectKBestWeighted(vector[int]&) except +
|
||||
# # SelectKBestWeighted(vector[int]&, vector[int]&, vector[precision_t]&, int) except +
|
||||
# void fit()
|
||||
# string version()
|
||||
# vector[precision_t] getScore()
|
||||
|
||||
# cdef class CSelectKBestWeighted:
|
||||
# cdef SelectKBestWeighted *thisptr
|
||||
# def __cinit__(self, X, y, weights, k):
|
||||
# # self.thisptr = new SelectKBestWeighted(X, y, weights, k)
|
||||
# self.thisptr = new SelectKBestWeighted(X)
|
||||
# def __dealloc__(self):
|
||||
# del self.thisptr
|
||||
# def fit(self,):
|
||||
# self.thisptr.fit()
|
||||
# return self
|
||||
# def get_score(self):
|
||||
# return self.thisptr.getScore()
|
||||
# def get_version(self):
|
||||
# return self.thisptr.version()
|
||||
# def __reduce__(self):
|
||||
# return (CSelectKBestWeighted, ())
|
24
bayesclass/chargpt.cpp
Normal file
24
bayesclass/chargpt.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
double conditionalEntropy(std::vector<int>& classVec, std::vector<int>& featureVec, std::vector<double>& weightsVec)
|
||||
{
|
||||
std::map<int, std::vector<int>> classesPerFeatureValue;
|
||||
std::map<int, std::vector<double>> weightsPerFeatureValue;
|
||||
|
||||
for (int i = 0; i < featureVec.size(); i++) {
|
||||
classesPerFeatureValue[featureVec[i]].push_back(classVec[i]);
|
||||
weightsPerFeatureValue[featureVec[i]].push_back(weightsVec[i]);
|
||||
}
|
||||
|
||||
double totalEntropy = 0;
|
||||
double totalWeight = 0;
|
||||
for (auto& pair : classesPerFeatureValue) {
|
||||
double featureValueEntropy = calculateEntropy(pair.second, weightsPerFeatureValue[pair.first]);
|
||||
double featureValueWeight = 0;
|
||||
for (double weight : weightsPerFeatureValue[pair.first]) {
|
||||
featureValueWeight += weight;
|
||||
}
|
||||
totalEntropy += featureValueWeight * featureValueEntropy;
|
||||
totalWeight += featureValueWeight;
|
||||
}
|
||||
|
||||
return totalEntropy / totalWeight;
|
||||
}
|
48
bayesclass/copilot.cpp
Normal file
48
bayesclass/copilot.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
|
||||
using value_t = int;
|
||||
using precision_t = double;
|
||||
using sample_t = vector<value_t>;
|
||||
using score_t = vector<precision_t>;
|
||||
|
||||
precision_t entropy(const sample_t& data, const vector<precision_t>& weights)
|
||||
{
|
||||
precision_t p;
|
||||
precision_t ventropy = 0, totalWeight = 0;
|
||||
score_t counts(*max_element(data.begin(), data.end()) + 1, 0);
|
||||
for (auto i = 0; i < data.size(); ++i) {
|
||||
counts[data[i]] += weights[i];
|
||||
totalWeight += weights[i];
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0 || count < 0) {
|
||||
p = (count) / totalWeight;
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
return ventropy;
|
||||
}
|
||||
|
||||
precision_t conditionalEntropy(const sample_t& feature, const sample_t& labels, const vector<precision_t>& weights)
|
||||
{
|
||||
unordered_map<value_t, precision_t> featureCounts;
|
||||
unordered_map<value_t, unordered_map<value_t, precision_t>> jointCounts;
|
||||
featureCounts.clear();
|
||||
jointCounts.clear();
|
||||
auto totalWeight = 0.0;
|
||||
for (auto i = 0; i < feature.size(); i++) {
|
||||
featureCounts[feature[i]] += weights[i];
|
||||
jointCounts[feature[i]][labels[i]] += weights[i];
|
||||
totalWeight += weights[i];
|
||||
}
|
||||
precision_t entropy = 0;
|
||||
for (auto& [f, count] : featureCounts) {
|
||||
auto p_f = count / totalWeight;
|
||||
entropy += p_f * ::entropy(jointCounts[f], weights) / ::entropy(feature, weights);
|
||||
}
|
||||
return entropy;
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "setuptools-scm", "wheel"]
|
||||
requires = ["setuptools", "setuptools-scm", "cython", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
|
24
setup.py
Normal file
24
setup.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Calling
|
||||
$python setup.py build_ext --inplace
|
||||
will build the extension library in the current file.
|
||||
"""
|
||||
|
||||
from setuptools import Extension, setup
|
||||
|
||||
setup(
|
||||
ext_modules=[
|
||||
Extension(
|
||||
name="bayesclass.cppSelectFeatures",
|
||||
sources=[
|
||||
"bayesclass/cSelectFeatures.pyx",
|
||||
"bayesclass/FeatureSelect.cpp",
|
||||
],
|
||||
language="c++",
|
||||
include_dirs=["bayesclass"],
|
||||
extra_compile_args=[
|
||||
"-std=c++17",
|
||||
],
|
||||
),
|
||||
]
|
||||
)
|
Reference in New Issue
Block a user