Finish cppSelectFeatures

This commit is contained in:
2023-06-23 20:07:26 +02:00
parent d7425e5af0
commit 9d7e787f6c
11 changed files with 405 additions and 328 deletions

View File

@@ -1,7 +0,0 @@
cmake_minimum_required(VERSION 3.20)
project(feature)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
add_executable(feature FeatureSelect.cpp)

View File

@@ -1,5 +1,4 @@
#include "FeatureSelect.h"
#include <iostream>
namespace features {
SelectKBestWeighted::SelectKBestWeighted(samples_t& samples, labels_t& labels, weights_t& weights, int k, bool nat)
: samples(samples), labels(labels), weights(weights), k(k), nat(nat)
@@ -22,59 +21,46 @@ namespace features {
auto labelsCopy = labels;
numFeatures = samples[0].size();
numSamples = samples.size();
// compute number of classes
sort(labelsCopy.begin(), labelsCopy.end());
auto last = unique(labelsCopy.begin(), labelsCopy.end());
labelsCopy.erase(last, labelsCopy.end());
numClasses = labelsCopy.size();
score.reserve(numFeatures);
// compute scores
scores.reserve(numFeatures);
for (int i = 0; i < numFeatures; ++i) {
score.push_back(MutualInformation(i));
scores.push_back(MutualInformation(i));
features.push_back(i);
}
outputValues();
// sort & reduce scores and features
sort(features.begin(), features.end(), [&](int i, int j)
{ return scores[i] > scores[j]; });
sort(scores.begin(), scores.end(), greater<precision_t>());
features.resize(k);
scores.resize(k);
fitted = true;
}
void SelectKBestWeighted::outputValues()
{
cout << "numFeatures: " << numFeatures << endl;
cout << "numClasses: " << numClasses << endl;
cout << "numSamples: " << numSamples << endl;
cout << "k: " << k << endl;
cout << "weights: ";
for (auto item : weights)
cout << item << ", ";
cout << "end." << endl;
cout << "labels: ";
for (auto item : labels)
cout << item << ", ";
cout << "end." << endl;
cout << "samples: " << endl;
for (auto item : samples) {
for (auto item2 : item)
cout << item2 << ", ";
cout << "end." << endl;
}
cout << "end." << endl;
}
precision_t SelectKBestWeighted::entropyLabel()
{
return entropy(labels);
}
precision_t SelectKBestWeighted::entropy(const sample_t& data)
{
precision_t p;
precision_t ventropy = 0, totalWeight = 0;
score_t counts(numClasses + 1, 0);
for (auto i = 0; i < data.size(); ++i) {
for (auto i = 0; i < static_cast<int>(data.size()); ++i) {
counts[data[i]] += weights[i];
totalWeight += weights[i];
}
for (auto count : counts) {
p = count / totalWeight;
if (p > 0)
if (nat)
precision_t p = count / totalWeight;
if (p > 0) {
if (nat) {
ventropy -= p * log(p);
else
} else {
ventropy -= p * log2(p);
}
}
}
return ventropy;
}
@@ -100,10 +86,11 @@ namespace features {
for (auto& [label, jointCount] : jointCounts[feat]) {
auto p_l_f = jointCount / count;
if (p_l_f > 0) {
if (nat)
if (nat) {
entropy_f -= p_l_f * log(p_l_f);
else
} else {
entropy_f -= p_l_f * log2(p_l_f);
}
}
}
entropy += p_f * entropy_f;
@@ -115,27 +102,17 @@ namespace features {
{
return entropyLabel() - conditionalEntropy(i);
}
score_t SelectKBestWeighted::getScore() const
score_t SelectKBestWeighted::getScores() const
{
if (!fitted)
throw logic_error("score not fitted");
return score;
return scores;
}
//Return the indices of the selected features
labels_t SelectKBestWeighted::getFeatures() const
{
if (!fitted)
throw logic_error("score not fitted");
return features;
}
}
// using namespace std;
// int main()
// {
// vector<vector<int>> samples = { {1, 2, 3}, {4, 5, 6}, {7, 8, 9} };
// vector<int> labels = { 1, 2, 1 };
// vector<float> weights = { 0.1, 0.7, 0.2 };
// int k = 3;
// auto metric = features::SelectKBestWeighted(samples, labels, weights, k);
// metric.fit();
// cout << "score: ";
// for (auto item : metric.getScore())
// cout << item << ", ";
// cout << "end." << endl;
// return 0;
// }

View File

@@ -21,16 +21,17 @@ namespace features {
bool nat; // use natural log or log2
int numFeatures, numClasses, numSamples;
bool fitted;
score_t score;
score_t scores; // scores of the features
labels_t features; // indices of the selected features
precision_t entropyLabel();
precision_t entropy(const sample_t&);
precision_t conditionalEntropy(const int);
precision_t MutualInformation(const int);
void outputValues();
public:
SelectKBestWeighted(samples_t&, labels_t&, weights_t&, int, bool);
void fit();
score_t getScore() const;
score_t getScores() const;
labels_t getFeatures() const; //Return the indices of the selected features
static inline string version() { return "0.1.0"; };
};
}

View File

@@ -1 +0,0 @@
#error Do not use this file, it is the result of a failed Cython compilation.

View File

@@ -983,8 +983,8 @@ static const char *__pyx_f[] = {
/*--- Type declarations ---*/
struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted;
/* "bayesclass/cSelectFeatures.pyx":16
* vector[precision_t] getScore()
/* "bayesclass/cSelectFeatures.pyx":17
* vector[int] getFeatures()
*
* cdef class CSelectKBestWeighted: # <<<<<<<<<<<<<<
* cdef SelectKBestWeighted *thisptr
@@ -1251,6 +1251,9 @@ static void __Pyx_CppExn2PyErr() {
/* CIntFromPy.proto */
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
/* CIntToPy.proto */
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
/* CIntFromPy.proto */
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *);
@@ -1294,6 +1297,7 @@ static std::vector<int> __pyx_convert_vector_from_py_int(PyObject *); /*proto*/
static std::vector<std::vector<int> > __pyx_convert_vector_from_py_std_3a__3a_vector_3c_int_3e___(PyObject *); /*proto*/
static std::vector<features::precision_t> __pyx_convert_vector_from_py_features_3a__3a_precision_t(PyObject *); /*proto*/
static PyObject *__pyx_convert_vector_to_py_features_3a__3a_precision_t(const std::vector<features::precision_t> &); /*proto*/
static PyObject *__pyx_convert_vector_to_py_int(const std::vector<int> &); /*proto*/
static CYTHON_INLINE PyObject *__pyx_convert_PyObject_string_to_py_std__in_string(std::string const &); /*proto*/
static CYTHON_INLINE PyObject *__pyx_convert_PyUnicode_string_to_py_std__in_string(std::string const &); /*proto*/
static CYTHON_INLINE PyObject *__pyx_convert_PyStr_string_to_py_std__in_string(std::string const &); /*proto*/
@@ -1330,13 +1334,14 @@ static PyObject *__pyx_n_s_y;
static int __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted___cinit__(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self, PyObject *__pyx_v_X, PyObject *__pyx_v_y, PyObject *__pyx_v_weights, PyObject *__pyx_v_k, PyObject *__pyx_v_natural); /* proto */
static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__dealloc__(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_4fit(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_score(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_scores(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_features(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_12__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_tp_new_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
/* Late includes */
/* "bayesclass/cSelectFeatures.pyx":18
/* "bayesclass/cSelectFeatures.pyx":19
* cdef class CSelectKBestWeighted:
* cdef SelectKBestWeighted *thisptr
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2 # <<<<<<<<<<<<<<
@@ -1388,19 +1393,19 @@ static int __pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_1__c
case 1:
if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_y)) != 0)) kw_args--;
else {
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 1); __PYX_ERR(0, 18, __pyx_L3_error)
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 1); __PYX_ERR(0, 19, __pyx_L3_error)
}
CYTHON_FALLTHROUGH;
case 2:
if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_weights)) != 0)) kw_args--;
else {
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 2); __PYX_ERR(0, 18, __pyx_L3_error)
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 2); __PYX_ERR(0, 19, __pyx_L3_error)
}
CYTHON_FALLTHROUGH;
case 3:
if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_k)) != 0)) kw_args--;
else {
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 3); __PYX_ERR(0, 18, __pyx_L3_error)
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 3); __PYX_ERR(0, 19, __pyx_L3_error)
}
CYTHON_FALLTHROUGH;
case 4:
@@ -1410,7 +1415,7 @@ static int __pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_1__c
}
}
if (unlikely(kw_args > 0)) {
if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(0, 18, __pyx_L3_error)
if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(0, 19, __pyx_L3_error)
}
} else {
switch (PyTuple_GET_SIZE(__pyx_args)) {
@@ -1432,7 +1437,7 @@ static int __pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_1__c
}
goto __pyx_L4_argument_unpacking_done;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 18, __pyx_L3_error)
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 19, __pyx_L3_error)
__pyx_L3_error:;
__Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
@@ -1459,27 +1464,27 @@ static int __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted___ci
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__cinit__", 0);
/* "bayesclass/cSelectFeatures.pyx":19
/* "bayesclass/cSelectFeatures.pyx":20
* cdef SelectKBestWeighted *thisptr
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) # <<<<<<<<<<<<<<
* def __dealloc__(self):
* del self.thisptr
*/
__pyx_t_1 = __pyx_convert_vector_from_py_std_3a__3a_vector_3c_int_3e___(__pyx_v_X); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error)
__pyx_t_2 = __pyx_convert_vector_from_py_int(__pyx_v_y); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error)
__pyx_t_3 = __pyx_convert_vector_from_py_features_3a__3a_precision_t(__pyx_v_weights); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error)
__pyx_t_4 = __Pyx_PyInt_As_int(__pyx_v_k); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error)
__pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_natural); if (unlikely((__pyx_t_5 == ((bool)-1)) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error)
__pyx_t_1 = __pyx_convert_vector_from_py_std_3a__3a_vector_3c_int_3e___(__pyx_v_X); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_2 = __pyx_convert_vector_from_py_int(__pyx_v_y); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_3 = __pyx_convert_vector_from_py_features_3a__3a_precision_t(__pyx_v_weights); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_4 = __Pyx_PyInt_As_int(__pyx_v_k); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_natural); if (unlikely((__pyx_t_5 == ((bool)-1)) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
try {
__pyx_t_6 = new features::SelectKBestWeighted(__pyx_t_1, __pyx_t_2, __pyx_t_3, __pyx_t_4, __pyx_t_5);
} catch(...) {
__Pyx_CppExn2PyErr();
__PYX_ERR(0, 19, __pyx_L1_error)
__PYX_ERR(0, 20, __pyx_L1_error)
}
__pyx_v_self->thisptr = __pyx_t_6;
/* "bayesclass/cSelectFeatures.pyx":18
/* "bayesclass/cSelectFeatures.pyx":19
* cdef class CSelectKBestWeighted:
* cdef SelectKBestWeighted *thisptr
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2 # <<<<<<<<<<<<<<
@@ -1498,7 +1503,7 @@ static int __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted___ci
return __pyx_r;
}
/* "bayesclass/cSelectFeatures.pyx":20
/* "bayesclass/cSelectFeatures.pyx":21
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural)
* def __dealloc__(self): # <<<<<<<<<<<<<<
@@ -1521,7 +1526,7 @@ static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__dealloc__", 0);
/* "bayesclass/cSelectFeatures.pyx":21
/* "bayesclass/cSelectFeatures.pyx":22
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural)
* def __dealloc__(self):
* del self.thisptr # <<<<<<<<<<<<<<
@@ -1530,7 +1535,7 @@ static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__
*/
delete __pyx_v_self->thisptr;
/* "bayesclass/cSelectFeatures.pyx":20
/* "bayesclass/cSelectFeatures.pyx":21
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural)
* def __dealloc__(self): # <<<<<<<<<<<<<<
@@ -1542,7 +1547,7 @@ static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__
__Pyx_RefNannyFinishContext();
}
/* "bayesclass/cSelectFeatures.pyx":22
/* "bayesclass/cSelectFeatures.pyx":23
* def __dealloc__(self):
* del self.thisptr
* def fit(self,): # <<<<<<<<<<<<<<
@@ -1568,28 +1573,28 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("fit", 0);
/* "bayesclass/cSelectFeatures.pyx":23
/* "bayesclass/cSelectFeatures.pyx":24
* del self.thisptr
* def fit(self,):
* self.thisptr.fit() # <<<<<<<<<<<<<<
* return self
* def get_score(self):
* def get_scores(self):
*/
__pyx_v_self->thisptr->fit();
/* "bayesclass/cSelectFeatures.pyx":24
/* "bayesclass/cSelectFeatures.pyx":25
* def fit(self,):
* self.thisptr.fit()
* return self # <<<<<<<<<<<<<<
* def get_score(self):
* return self.thisptr.getScore()
* def get_scores(self):
* return self.thisptr.getScores()
*/
__Pyx_XDECREF(__pyx_r);
__Pyx_INCREF(((PyObject *)__pyx_v_self));
__pyx_r = ((PyObject *)__pyx_v_self);
goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":22
/* "bayesclass/cSelectFeatures.pyx":23
* def __dealloc__(self):
* del self.thisptr
* def fit(self,): # <<<<<<<<<<<<<<
@@ -1604,62 +1609,62 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
return __pyx_r;
}
/* "bayesclass/cSelectFeatures.pyx":25
/* "bayesclass/cSelectFeatures.pyx":26
* self.thisptr.fit()
* return self
* def get_score(self): # <<<<<<<<<<<<<<
* return self.thisptr.getScore()
* def get_version(self):
* def get_scores(self): # <<<<<<<<<<<<<<
* return self.thisptr.getScores()
* def get_features(self):
*/
/* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_score(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_score(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_scores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_scores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("get_score (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_score(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
__Pyx_RefNannySetupContext("get_scores (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_scores(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_score(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_scores(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("get_score", 0);
__Pyx_RefNannySetupContext("get_scores", 0);
/* "bayesclass/cSelectFeatures.pyx":26
/* "bayesclass/cSelectFeatures.pyx":27
* return self
* def get_score(self):
* return self.thisptr.getScore() # <<<<<<<<<<<<<<
* def get_version(self):
* return self.thisptr.version()
* def get_scores(self):
* return self.thisptr.getScores() # <<<<<<<<<<<<<<
* def get_features(self):
* return self.thisptr.getFeatures()
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_convert_vector_to_py_features_3a__3a_precision_t(__pyx_v_self->thisptr->getScore()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error)
__pyx_t_1 = __pyx_convert_vector_to_py_features_3a__3a_precision_t(__pyx_v_self->thisptr->getScores()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 27, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":25
/* "bayesclass/cSelectFeatures.pyx":26
* self.thisptr.fit()
* return self
* def get_score(self): # <<<<<<<<<<<<<<
* return self.thisptr.getScore()
* def get_version(self):
* def get_scores(self): # <<<<<<<<<<<<<<
* return self.thisptr.getScores()
* def get_features(self):
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.get_score", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.get_scores", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
@@ -1667,28 +1672,91 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
return __pyx_r;
}
/* "bayesclass/cSelectFeatures.pyx":27
* def get_score(self):
* return self.thisptr.getScore()
* def get_version(self): # <<<<<<<<<<<<<<
* return self.thisptr.version()
* def __reduce__(self):
/* "bayesclass/cSelectFeatures.pyx":28
* def get_scores(self):
* return self.thisptr.getScores()
* def get_features(self): # <<<<<<<<<<<<<<
* return self.thisptr.getFeatures()
* def get_version(self):
*/
/* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_features(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_features(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("get_version (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_version(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
__Pyx_RefNannySetupContext("get_features (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_features(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_features(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("get_features", 0);
/* "bayesclass/cSelectFeatures.pyx":29
* return self.thisptr.getScores()
* def get_features(self):
* return self.thisptr.getFeatures() # <<<<<<<<<<<<<<
* def get_version(self):
* return self.thisptr.version()
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_convert_vector_to_py_int(__pyx_v_self->thisptr->getFeatures()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":28
* def get_scores(self):
* return self.thisptr.getScores()
* def get_features(self): # <<<<<<<<<<<<<<
* return self.thisptr.getFeatures()
* def get_version(self):
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.get_features", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "bayesclass/cSelectFeatures.pyx":30
* def get_features(self):
* return self.thisptr.getFeatures()
* def get_version(self): # <<<<<<<<<<<<<<
* return self.thisptr.version()
* def __reduce__(self):
*/
/* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("get_version (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10get_version(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
@@ -1697,23 +1765,23 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("get_version", 0);
/* "bayesclass/cSelectFeatures.pyx":28
* return self.thisptr.getScore()
/* "bayesclass/cSelectFeatures.pyx":31
* return self.thisptr.getFeatures()
* def get_version(self):
* return self.thisptr.version() # <<<<<<<<<<<<<<
* def __reduce__(self):
* return (CSelectKBestWeighted, ())
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_convert_PyBytes_string_to_py_std__in_string(__pyx_v_self->thisptr->version()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error)
__pyx_t_1 = __pyx_convert_PyBytes_string_to_py_std__in_string(__pyx_v_self->thisptr->version()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":27
* def get_score(self):
* return self.thisptr.getScore()
/* "bayesclass/cSelectFeatures.pyx":30
* def get_features(self):
* return self.thisptr.getFeatures()
* def get_version(self): # <<<<<<<<<<<<<<
* return self.thisptr.version()
* def __reduce__(self):
@@ -1730,7 +1798,7 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
return __pyx_r;
}
/* "bayesclass/cSelectFeatures.pyx":29
/* "bayesclass/cSelectFeatures.pyx":32
* def get_version(self):
* return self.thisptr.version()
* def __reduce__(self): # <<<<<<<<<<<<<<
@@ -1738,19 +1806,19 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
*/
/* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_13__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_13__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10__reduce__(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_12__reduce__(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_12__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
@@ -1759,13 +1827,13 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__reduce__", 0);
/* "bayesclass/cSelectFeatures.pyx":30
/* "bayesclass/cSelectFeatures.pyx":33
* return self.thisptr.version()
* def __reduce__(self):
* return (CSelectKBestWeighted, ()) # <<<<<<<<<<<<<<
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error)
__pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 33, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__Pyx_INCREF(((PyObject *)__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted));
__Pyx_GIVEREF(((PyObject *)__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted));
@@ -1777,7 +1845,7 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
__pyx_t_1 = 0;
goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":29
/* "bayesclass/cSelectFeatures.pyx":32
* def get_version(self):
* return self.thisptr.version()
* def __reduce__(self): # <<<<<<<<<<<<<<
@@ -2102,7 +2170,7 @@ static std::vector<features::precision_t> __pyx_convert_vector_from_py_features
* return v
*
*/
__pyx_t_5 = __pyx_PyFloat_AsDouble(__pyx_v_item); if (unlikely((__pyx_t_5 == ((features::precision_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 48, __pyx_L1_error)
__pyx_t_5 = __pyx_PyFloat_AsFloat(__pyx_v_item); if (unlikely((__pyx_t_5 == ((features::precision_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 48, __pyx_L1_error)
__pyx_v_v.push_back(((features::precision_t)__pyx_t_5));
/* "vector.from_py":47
@@ -2210,6 +2278,63 @@ static PyObject *__pyx_convert_vector_to_py_features_3a__3a_precision_t(const st
return __pyx_r;
}
static PyObject *__pyx_convert_vector_to_py_int(const std::vector<int> &__pyx_v_v) {
size_t __pyx_v_i;
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
size_t __pyx_t_2;
size_t __pyx_t_3;
size_t __pyx_t_4;
PyObject *__pyx_t_5 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__pyx_convert_vector_to_py_int", 0);
/* "vector.to_py":61
* @cname("__pyx_convert_vector_to_py_int")
* cdef object __pyx_convert_vector_to_py_int(vector[X]& v):
* return [v[i] for i in range(v.size())] # <<<<<<<<<<<<<<
*
*
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 61, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __pyx_v_v.size();
__pyx_t_3 = __pyx_t_2;
for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
__pyx_v_i = __pyx_t_4;
__pyx_t_5 = __Pyx_PyInt_From_int((__pyx_v_v[__pyx_v_i])); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 61, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_5);
if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_5))) __PYX_ERR(1, 61, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
}
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "vector.to_py":60
*
* @cname("__pyx_convert_vector_to_py_int")
* cdef object __pyx_convert_vector_to_py_int(vector[X]& v): # <<<<<<<<<<<<<<
* return [v[i] for i in range(v.size())]
*
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_5);
__Pyx_AddTraceback("vector.to_py.__pyx_convert_vector_to_py_int", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "string.to_py":31
*
* @cname("__pyx_convert_PyObject_string_to_py_std__in_string")
@@ -2493,9 +2618,10 @@ static void __pyx_tp_dealloc_10bayesclass_17cppSelectFeatures_CSelectKBestWeight
static PyMethodDef __pyx_methods_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted[] = {
{"fit", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_5fit, METH_NOARGS, 0},
{"get_score", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_score, METH_NOARGS, 0},
{"get_version", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_version, METH_NOARGS, 0},
{"__reduce__", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11__reduce__, METH_NOARGS, 0},
{"get_scores", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_scores, METH_NOARGS, 0},
{"get_features", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_features, METH_NOARGS, 0},
{"get_version", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11get_version, METH_NOARGS, 0},
{"__reduce__", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_13__reduce__, METH_NOARGS, 0},
{0, 0, 0, 0}
};
@@ -2690,14 +2816,14 @@ static int __Pyx_modinit_type_init_code(void) {
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
/*--- Type init code ---*/
if (PyType_Ready(&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 16, __pyx_L1_error)
if (PyType_Ready(&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 17, __pyx_L1_error)
#if PY_VERSION_HEX < 0x030800B1
__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_print = 0;
#endif
if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_dictoffset && __pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_getattro == PyObject_GenericGetAttr)) {
__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_getattro = __Pyx_PyObject_GenericGetAttr;
}
if (PyObject_SetAttr(__pyx_m, __pyx_n_s_CSelectKBestWeighted, (PyObject *)&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 16, __pyx_L1_error)
if (PyObject_SetAttr(__pyx_m, __pyx_n_s_CSelectKBestWeighted, (PyObject *)&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 17, __pyx_L1_error)
__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted = &__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted;
__Pyx_RefNannyFinishContext();
return 0;
@@ -3707,6 +3833,44 @@ raise_neg_overflow:
return (int) -1;
}
/* CIntToPy */
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
const int neg_one = (int) -1, const_zero = (int) 0;
#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
const int is_unsigned = neg_one > const_zero;
if (is_unsigned) {
if (sizeof(int) < sizeof(long)) {
return PyInt_FromLong((long) value);
} else if (sizeof(int) <= sizeof(unsigned long)) {
return PyLong_FromUnsignedLong((unsigned long) value);
#ifdef HAVE_LONG_LONG
} else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
#endif
}
} else {
if (sizeof(int) <= sizeof(long)) {
return PyInt_FromLong((long) value);
#ifdef HAVE_LONG_LONG
} else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
return PyLong_FromLongLong((PY_LONG_LONG) value);
#endif
}
}
{
int one = 1; int little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&value;
return _PyLong_FromByteArray(bytes, sizeof(int),
little, !is_unsigned);
}
}
/* CIntFromPy */
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) {
#ifdef __Pyx_HAS_GCC_DIAGNOSTIC

View File

@@ -6,12 +6,13 @@ from libcpp cimport bool
cdef extern from "FeatureSelect.h" namespace "features":
ctypedef double precision_t
ctypedef float precision_t
cdef cppclass SelectKBestWeighted:
SelectKBestWeighted(vector[vector[int]]&, vector[int]&, vector[precision_t]&, int, bool) except +
void fit()
string version()
vector[precision_t] getScore()
vector[precision_t] getScores()
vector[int] getFeatures()
cdef class CSelectKBestWeighted:
cdef SelectKBestWeighted *thisptr
@@ -22,8 +23,10 @@ cdef class CSelectKBestWeighted:
def fit(self,):
self.thisptr.fit()
return self
def get_score(self):
return self.thisptr.getScore()
def get_scores(self):
return self.thisptr.getScores()
def get_features(self):
return self.thisptr.getFeatures()
def get_version(self):
return self.thisptr.version()
def __reduce__(self):

View File

@@ -1,24 +0,0 @@
double conditionalEntropy(std::vector<int>& classVec, std::vector<int>& featureVec, std::vector<double>& weightsVec)
{
std::map<int, std::vector<int>> classesPerFeatureValue;
std::map<int, std::vector<double>> weightsPerFeatureValue;
for (int i = 0; i < featureVec.size(); i++) {
classesPerFeatureValue[featureVec[i]].push_back(classVec[i]);
weightsPerFeatureValue[featureVec[i]].push_back(weightsVec[i]);
}
double totalEntropy = 0;
double totalWeight = 0;
for (auto& pair : classesPerFeatureValue) {
double featureValueEntropy = calculateEntropy(pair.second, weightsPerFeatureValue[pair.first]);
double featureValueWeight = 0;
for (double weight : weightsPerFeatureValue[pair.first]) {
featureValueWeight += weight;
}
totalEntropy += featureValueWeight * featureValueEntropy;
totalWeight += featureValueWeight;
}
return totalEntropy / totalWeight;
}

View File

@@ -15,7 +15,7 @@ from pgmpy.models import BayesianNetwork
from pgmpy.base import DAG
import matplotlib.pyplot as plt
from fimdlp.mdlp import FImdlp
from .feature_selection import SelectKBestWeighted
from .cppSelectFeatures import CSelectKBestWeighted
from ._version import __version__
@@ -869,15 +869,39 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
self.nodes_leaves = self.nodes_edges
return self
def mutual_info_classif_weighted(X, y, sample_weight):
# Compute the mutual information between each feature and the target
mi = mutual_info_classif(X, y)
def version(self):
if hasattr(self, "fitted_"):
return self.estimator_.version()
return SPODE(None, False).version()
# Multiply the mutual information scores with the sample weights
mi_weighted = mi * sample_weight
@property
def states_(self):
if hasattr(self, "fitted_"):
return sum(
[
len(item)
for model in self.estimators_
for _, item in model.model_.states.items()
]
) / len(self.estimators_)
return 0
# Return the weighted mutual information scores
return mi_weighted
@property
def depth_(self):
return self.states_
def nodes_edges(self):
nodes = 0
edges = 0
if hasattr(self, "fitted_"):
nodes = sum([len(x.dag_) for x in self.estimators_])
edges = sum([len(x.dag_.edges()) for x in self.estimators_])
return nodes, edges
def plot(self, title=""):
warnings.simplefilter("ignore", UserWarning)
for idx, model in enumerate(self.estimators_):
model.plot(title=f"{idx} {title}")
def _train(self, kwargs):
"""Build boosted SPODEs"""
@@ -885,14 +909,12 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
# Step 0: Set the finish condition
for num in range(self.n_estimators):
# Step 1: Build ranking with mutual information
# OJO MAL, ESTO NO ACTUALIZA EL RANKING CON LOS PESOS
# SIEMPRE VA A SACAR LO MISMO
feature = (
SelectKBestWeighted(k=1)
.fit(self.X_, self.y_, weights)
.get_feature_names_out(self.feature_names_in_)
.tolist()
n_feature = (
CSelectKBestWeighted(self.X_, self.y_, weights, k=1)
.fit()
.get_features()[0]
)
feature = self.feature_names_in_[n_feature]
# Step 2: Build & train spode with the first feature as sparent
estimator = clone(self.estimator_)
_args = kwargs.copy()
@@ -910,8 +932,8 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
# Step 3.2: Update weights for next classifier
weights = [
wm * np.exp(am * (ym != y_pred))
for wm, ym in zip(weights, self.y_)
wm * np.exp(am * (ym != yp))
for wm, ym, yp in zip(weights, self.y_, y_pred)
]
# Step 4: Add the new model
self.estimators_.append(estimator)

View File

@@ -1,48 +0,0 @@
#include <unordered_map>
#include <vector>
#include <cmath>
using namespace std;
using value_t = int;
using precision_t = double;
using sample_t = vector<value_t>;
using score_t = vector<precision_t>;
precision_t entropy(const sample_t& data, const vector<precision_t>& weights)
{
precision_t p;
precision_t ventropy = 0, totalWeight = 0;
score_t counts(*max_element(data.begin(), data.end()) + 1, 0);
for (auto i = 0; i < data.size(); ++i) {
counts[data[i]] += weights[i];
totalWeight += weights[i];
}
for (auto count : counts) {
if (count > 0 || count < 0) {
p = (count) / totalWeight;
ventropy -= p * log2(p);
}
}
return ventropy;
}
precision_t conditionalEntropy(const sample_t& feature, const sample_t& labels, const vector<precision_t>& weights)
{
unordered_map<value_t, precision_t> featureCounts;
unordered_map<value_t, unordered_map<value_t, precision_t>> jointCounts;
featureCounts.clear();
jointCounts.clear();
auto totalWeight = 0.0;
for (auto i = 0; i < feature.size(); i++) {
featureCounts[feature[i]] += weights[i];
jointCounts[feature[i]][labels[i]] += weights[i];
totalWeight += weights[i];
}
precision_t entropy = 0;
for (auto& [f, count] : featureCounts) {
auto p_f = count / totalWeight;
entropy += p_f * ::entropy(jointCounts[f], weights) / ::entropy(feature, weights);
}
return entropy;
}

View File

@@ -1,93 +1,93 @@
import numpy as np
from sklearn.feature_selection import mutual_info_classif
from sklearn.utils.validation import check_X_y, check_is_fitted
from sklearn.feature_selection._univariate_selection import (
_BaseFilter,
_clean_nans,
)
# import numpy as np
# from sklearn.feature_selection import mutual_info_classif
# from sklearn.utils.validation import check_X_y, check_is_fitted
# from sklearn.feature_selection._univariate_selection import (
# _BaseFilter,
# _clean_nans,
# )
"""
Compute the weighted mutual information between each feature and the
target.
Based on
Silviu Guiaşu,
Weighted entropy,
Reports on Mathematical Physics,
Volume 2, Issue 3,
1971,
Pages 165-179,
ISSN 0034-4877,
https://doi.org/10.1016/0034-4877(71)90002-4.
(https://www.sciencedirect.com/science/article/pii/0034487771900024)
Abstract: Weighted entropy is the measure of information supplied by a
probablistic experiment whose elementary events are characterized both by their
objective probabilities and by some qualitative (objective or subjective)
weights. The properties, the axiomatics and the maximum value of the weighted
entropy are given.
"""
# """
# Compute the weighted mutual information between each feature and the
# target.
# Based on
# Silviu Guiaşu,
# Weighted entropy,
# Reports on Mathematical Physics,
# Volume 2, Issue 3,
# 1971,
# Pages 165-179,
# ISSN 0034-4877,
# https://doi.org/10.1016/0034-4877(71)90002-4.
# (https://www.sciencedirect.com/science/article/pii/0034487771900024)
# Abstract: Weighted entropy is the measure of information supplied by a
# probablistic experiment whose elementary events are characterized both by their
# objective probabilities and by some qualitative (objective or subjective)
# weights. The properties, the axiomatics and the maximum value of the weighted
# entropy are given.
# """
class SelectKBestWeighted(_BaseFilter):
def __init__(self, *, k=10):
super().__init__(score_func=mutual_info_classif)
self.k = k
# class SelectKBestWeighted(_BaseFilter):
# def __init__(self, *, k=10):
# super().__init__(score_func=mutual_info_classif)
# self.k = k
def _check_params(self, X, y):
if self.k > X.shape[1] or self.k < 1:
raise ValueError(
f"k must be between 1 and {X.shape[1]} got {self.k}."
)
# def _check_params(self, X, y):
# if self.k > X.shape[1] or self.k < 1:
# raise ValueError(
# f"k must be between 1 and {X.shape[1]} got {self.k}."
# )
def _get_support_mask(self):
check_is_fitted(self)
# def _get_support_mask(self):
# check_is_fitted(self)
if self.k == "all":
return np.ones(self.scores_.shape, dtype=bool)
elif self.k == 0:
return np.zeros(self.scores_.shape, dtype=bool)
else:
scores = _clean_nans(self.scores_)
mask = np.zeros(scores.shape, dtype=bool)
# if self.k == "all":
# return np.ones(self.scores_.shape, dtype=bool)
# elif self.k == 0:
# return np.zeros(self.scores_.shape, dtype=bool)
# else:
# scores = _clean_nans(self.scores_)
# mask = np.zeros(scores.shape, dtype=bool)
# Request a stable sort. Mergesort takes more memory (~40MB per
# megafeature on x86-64).
mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1
return mask
# # Request a stable sort. Mergesort takes more memory (~40MB per
# # megafeature on x86-64).
# mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1
# return mask
def fit(self, X, y, sample_weight):
self.X_, self.y_ = check_X_y(X, y)
self._check_params(X, y)
self.n_features_in_ = X.shape[1]
self.sample_weight_ = sample_weight
# Compute the entropy of the target variable
entropy_y = -np.sum(
np.multiply(
np.bincount(y, weights=sample_weight),
np.log(np.bincount(y, weights=sample_weight)),
)
)
# def fit(self, X, y, sample_weight):
# self.X_, self.y_ = check_X_y(X, y)
# self._check_params(X, y)
# self.n_features_in_ = X.shape[1]
# self.sample_weight_ = sample_weight
# # Compute the entropy of the target variable
# entropy_y = -np.sum(
# np.multiply(
# np.bincount(y, weights=sample_weight),
# np.log(np.bincount(y, weights=sample_weight)),
# )
# )
# Compute the mutual information between each feature and the target
mi = self.score_func(X, y)
# # Compute the mutual information between each feature and the target
# mi = self.score_func(X, y)
# Compute the weighted entropy of each feature
entropy_weighted = []
for i in range(X.shape[1]):
# Compute the weighted frequency of each unique value of the
# feature
freq_weighted = np.bincount(X[:, i], weights=sample_weight)
freq_weighted = freq_weighted[freq_weighted != 0]
# # Compute the weighted entropy of each feature
# entropy_weighted = []
# for i in range(X.shape[1]):
# # Compute the weighted frequency of each unique value of the
# # feature
# freq_weighted = np.bincount(X[:, i], weights=sample_weight)
# freq_weighted = freq_weighted[freq_weighted != 0]
# Compute the weighted entropy of the feature
entropy_weighted.append(
-np.sum(np.multiply(freq_weighted, np.log(freq_weighted)))
/ np.sum(sample_weight)
)
# # Compute the weighted entropy of the feature
# entropy_weighted.append(
# -np.sum(np.multiply(freq_weighted, np.log(freq_weighted)))
# / np.sum(sample_weight)
# )
# Compute the weighted mutual information between each feature and
# the target
mi_weighted = mi * entropy_weighted / entropy_y
# # Compute the weighted mutual information between each feature and
# # the target
# mi_weighted = mi * entropy_weighted / entropy_y
# Return the weighted mutual information scores
self.scores_ = mi_weighted
return self
# # Return the weighted mutual information scores
# self.scores_ = mi_weighted
# return self

10
test.py
View File

@@ -1,10 +0,0 @@
from bayesclass.cppSelectFeatures import CSelectKBestWeighted
X = [[x for x in range(i, i + 3)] for i in range(1, 30, 3)]
weights = [25 / (i + 1) for i in range(10)]
labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
test = CSelectKBestWeighted(X, labels, weights, 3)
test.fit()
for item in test.get_score():
print(item)