Finish cppSelectFeatures

This commit is contained in:
2023-06-23 20:07:26 +02:00
parent d7425e5af0
commit 9d7e787f6c
11 changed files with 405 additions and 328 deletions

View File

@@ -1,7 +0,0 @@
cmake_minimum_required(VERSION 3.20)
project(feature)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
add_executable(feature FeatureSelect.cpp)

View File

@@ -1,5 +1,4 @@
#include "FeatureSelect.h" #include "FeatureSelect.h"
#include <iostream>
namespace features { namespace features {
SelectKBestWeighted::SelectKBestWeighted(samples_t& samples, labels_t& labels, weights_t& weights, int k, bool nat) SelectKBestWeighted::SelectKBestWeighted(samples_t& samples, labels_t& labels, weights_t& weights, int k, bool nat)
: samples(samples), labels(labels), weights(weights), k(k), nat(nat) : samples(samples), labels(labels), weights(weights), k(k), nat(nat)
@@ -22,59 +21,46 @@ namespace features {
auto labelsCopy = labels; auto labelsCopy = labels;
numFeatures = samples[0].size(); numFeatures = samples[0].size();
numSamples = samples.size(); numSamples = samples.size();
// compute number of classes
sort(labelsCopy.begin(), labelsCopy.end()); sort(labelsCopy.begin(), labelsCopy.end());
auto last = unique(labelsCopy.begin(), labelsCopy.end()); auto last = unique(labelsCopy.begin(), labelsCopy.end());
labelsCopy.erase(last, labelsCopy.end()); labelsCopy.erase(last, labelsCopy.end());
numClasses = labelsCopy.size(); numClasses = labelsCopy.size();
score.reserve(numFeatures); // compute scores
scores.reserve(numFeatures);
for (int i = 0; i < numFeatures; ++i) { for (int i = 0; i < numFeatures; ++i) {
score.push_back(MutualInformation(i)); scores.push_back(MutualInformation(i));
features.push_back(i);
} }
outputValues(); // sort & reduce scores and features
sort(features.begin(), features.end(), [&](int i, int j)
{ return scores[i] > scores[j]; });
sort(scores.begin(), scores.end(), greater<precision_t>());
features.resize(k);
scores.resize(k);
fitted = true; fitted = true;
} }
void SelectKBestWeighted::outputValues()
{
cout << "numFeatures: " << numFeatures << endl;
cout << "numClasses: " << numClasses << endl;
cout << "numSamples: " << numSamples << endl;
cout << "k: " << k << endl;
cout << "weights: ";
for (auto item : weights)
cout << item << ", ";
cout << "end." << endl;
cout << "labels: ";
for (auto item : labels)
cout << item << ", ";
cout << "end." << endl;
cout << "samples: " << endl;
for (auto item : samples) {
for (auto item2 : item)
cout << item2 << ", ";
cout << "end." << endl;
}
cout << "end." << endl;
}
precision_t SelectKBestWeighted::entropyLabel() precision_t SelectKBestWeighted::entropyLabel()
{ {
return entropy(labels); return entropy(labels);
} }
precision_t SelectKBestWeighted::entropy(const sample_t& data) precision_t SelectKBestWeighted::entropy(const sample_t& data)
{ {
precision_t p;
precision_t ventropy = 0, totalWeight = 0; precision_t ventropy = 0, totalWeight = 0;
score_t counts(numClasses + 1, 0); score_t counts(numClasses + 1, 0);
for (auto i = 0; i < data.size(); ++i) { for (auto i = 0; i < static_cast<int>(data.size()); ++i) {
counts[data[i]] += weights[i]; counts[data[i]] += weights[i];
totalWeight += weights[i]; totalWeight += weights[i];
} }
for (auto count : counts) { for (auto count : counts) {
p = count / totalWeight; precision_t p = count / totalWeight;
if (p > 0) if (p > 0) {
if (nat) if (nat) {
ventropy -= p * log(p); ventropy -= p * log(p);
else } else {
ventropy -= p * log2(p); ventropy -= p * log2(p);
}
}
} }
return ventropy; return ventropy;
} }
@@ -100,10 +86,11 @@ namespace features {
for (auto& [label, jointCount] : jointCounts[feat]) { for (auto& [label, jointCount] : jointCounts[feat]) {
auto p_l_f = jointCount / count; auto p_l_f = jointCount / count;
if (p_l_f > 0) { if (p_l_f > 0) {
if (nat) if (nat) {
entropy_f -= p_l_f * log(p_l_f); entropy_f -= p_l_f * log(p_l_f);
else } else {
entropy_f -= p_l_f * log2(p_l_f); entropy_f -= p_l_f * log2(p_l_f);
}
} }
} }
entropy += p_f * entropy_f; entropy += p_f * entropy_f;
@@ -115,27 +102,17 @@ namespace features {
{ {
return entropyLabel() - conditionalEntropy(i); return entropyLabel() - conditionalEntropy(i);
} }
score_t SelectKBestWeighted::getScore() const score_t SelectKBestWeighted::getScores() const
{ {
if (!fitted) if (!fitted)
throw logic_error("score not fitted"); throw logic_error("score not fitted");
return score; return scores;
}
//Return the indices of the selected features
labels_t SelectKBestWeighted::getFeatures() const
{
if (!fitted)
throw logic_error("score not fitted");
return features;
} }
} }
// using namespace std;
// int main()
// {
// vector<vector<int>> samples = { {1, 2, 3}, {4, 5, 6}, {7, 8, 9} };
// vector<int> labels = { 1, 2, 1 };
// vector<float> weights = { 0.1, 0.7, 0.2 };
// int k = 3;
// auto metric = features::SelectKBestWeighted(samples, labels, weights, k);
// metric.fit();
// cout << "score: ";
// for (auto item : metric.getScore())
// cout << item << ", ";
// cout << "end." << endl;
// return 0;
// }

View File

@@ -21,16 +21,17 @@ namespace features {
bool nat; // use natural log or log2 bool nat; // use natural log or log2
int numFeatures, numClasses, numSamples; int numFeatures, numClasses, numSamples;
bool fitted; bool fitted;
score_t score; score_t scores; // scores of the features
labels_t features; // indices of the selected features
precision_t entropyLabel(); precision_t entropyLabel();
precision_t entropy(const sample_t&); precision_t entropy(const sample_t&);
precision_t conditionalEntropy(const int); precision_t conditionalEntropy(const int);
precision_t MutualInformation(const int); precision_t MutualInformation(const int);
void outputValues();
public: public:
SelectKBestWeighted(samples_t&, labels_t&, weights_t&, int, bool); SelectKBestWeighted(samples_t&, labels_t&, weights_t&, int, bool);
void fit(); void fit();
score_t getScore() const; score_t getScores() const;
labels_t getFeatures() const; //Return the indices of the selected features
static inline string version() { return "0.1.0"; }; static inline string version() { return "0.1.0"; };
}; };
} }

View File

@@ -1 +0,0 @@
#error Do not use this file, it is the result of a failed Cython compilation.

View File

@@ -983,8 +983,8 @@ static const char *__pyx_f[] = {
/*--- Type declarations ---*/ /*--- Type declarations ---*/
struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted; struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted;
/* "bayesclass/cSelectFeatures.pyx":16 /* "bayesclass/cSelectFeatures.pyx":17
* vector[precision_t] getScore() * vector[int] getFeatures()
* *
* cdef class CSelectKBestWeighted: # <<<<<<<<<<<<<< * cdef class CSelectKBestWeighted: # <<<<<<<<<<<<<<
* cdef SelectKBestWeighted *thisptr * cdef SelectKBestWeighted *thisptr
@@ -1251,6 +1251,9 @@ static void __Pyx_CppExn2PyErr() {
/* CIntFromPy.proto */ /* CIntFromPy.proto */
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
/* CIntToPy.proto */
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
/* CIntFromPy.proto */ /* CIntFromPy.proto */
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *);
@@ -1294,6 +1297,7 @@ static std::vector<int> __pyx_convert_vector_from_py_int(PyObject *); /*proto*/
static std::vector<std::vector<int> > __pyx_convert_vector_from_py_std_3a__3a_vector_3c_int_3e___(PyObject *); /*proto*/ static std::vector<std::vector<int> > __pyx_convert_vector_from_py_std_3a__3a_vector_3c_int_3e___(PyObject *); /*proto*/
static std::vector<features::precision_t> __pyx_convert_vector_from_py_features_3a__3a_precision_t(PyObject *); /*proto*/ static std::vector<features::precision_t> __pyx_convert_vector_from_py_features_3a__3a_precision_t(PyObject *); /*proto*/
static PyObject *__pyx_convert_vector_to_py_features_3a__3a_precision_t(const std::vector<features::precision_t> &); /*proto*/ static PyObject *__pyx_convert_vector_to_py_features_3a__3a_precision_t(const std::vector<features::precision_t> &); /*proto*/
static PyObject *__pyx_convert_vector_to_py_int(const std::vector<int> &); /*proto*/
static CYTHON_INLINE PyObject *__pyx_convert_PyObject_string_to_py_std__in_string(std::string const &); /*proto*/ static CYTHON_INLINE PyObject *__pyx_convert_PyObject_string_to_py_std__in_string(std::string const &); /*proto*/
static CYTHON_INLINE PyObject *__pyx_convert_PyUnicode_string_to_py_std__in_string(std::string const &); /*proto*/ static CYTHON_INLINE PyObject *__pyx_convert_PyUnicode_string_to_py_std__in_string(std::string const &); /*proto*/
static CYTHON_INLINE PyObject *__pyx_convert_PyStr_string_to_py_std__in_string(std::string const &); /*proto*/ static CYTHON_INLINE PyObject *__pyx_convert_PyStr_string_to_py_std__in_string(std::string const &); /*proto*/
@@ -1330,13 +1334,14 @@ static PyObject *__pyx_n_s_y;
static int __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted___cinit__(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self, PyObject *__pyx_v_X, PyObject *__pyx_v_y, PyObject *__pyx_v_weights, PyObject *__pyx_v_k, PyObject *__pyx_v_natural); /* proto */ static int __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted___cinit__(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self, PyObject *__pyx_v_X, PyObject *__pyx_v_y, PyObject *__pyx_v_weights, PyObject *__pyx_v_k, PyObject *__pyx_v_natural); /* proto */
static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__dealloc__(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */ static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__dealloc__(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_4fit(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_4fit(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_score(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_scores(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_features(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_12__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self); /* proto */
static PyObject *__pyx_tp_new_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ static PyObject *__pyx_tp_new_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
/* Late includes */ /* Late includes */
/* "bayesclass/cSelectFeatures.pyx":18 /* "bayesclass/cSelectFeatures.pyx":19
* cdef class CSelectKBestWeighted: * cdef class CSelectKBestWeighted:
* cdef SelectKBestWeighted *thisptr * cdef SelectKBestWeighted *thisptr
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2 # <<<<<<<<<<<<<< * def __cinit__(self, X, y, weights, k, natural=False): # log or log2 # <<<<<<<<<<<<<<
@@ -1388,19 +1393,19 @@ static int __pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_1__c
case 1: case 1:
if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_y)) != 0)) kw_args--; if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_y)) != 0)) kw_args--;
else { else {
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 1); __PYX_ERR(0, 18, __pyx_L3_error) __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 1); __PYX_ERR(0, 19, __pyx_L3_error)
} }
CYTHON_FALLTHROUGH; CYTHON_FALLTHROUGH;
case 2: case 2:
if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_weights)) != 0)) kw_args--; if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_weights)) != 0)) kw_args--;
else { else {
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 2); __PYX_ERR(0, 18, __pyx_L3_error) __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 2); __PYX_ERR(0, 19, __pyx_L3_error)
} }
CYTHON_FALLTHROUGH; CYTHON_FALLTHROUGH;
case 3: case 3:
if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_k)) != 0)) kw_args--; if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_k)) != 0)) kw_args--;
else { else {
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 3); __PYX_ERR(0, 18, __pyx_L3_error) __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, 3); __PYX_ERR(0, 19, __pyx_L3_error)
} }
CYTHON_FALLTHROUGH; CYTHON_FALLTHROUGH;
case 4: case 4:
@@ -1410,7 +1415,7 @@ static int __pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_1__c
} }
} }
if (unlikely(kw_args > 0)) { if (unlikely(kw_args > 0)) {
if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(0, 18, __pyx_L3_error) if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(0, 19, __pyx_L3_error)
} }
} else { } else {
switch (PyTuple_GET_SIZE(__pyx_args)) { switch (PyTuple_GET_SIZE(__pyx_args)) {
@@ -1432,7 +1437,7 @@ static int __pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_1__c
} }
goto __pyx_L4_argument_unpacking_done; goto __pyx_L4_argument_unpacking_done;
__pyx_L5_argtuple_error:; __pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 18, __pyx_L3_error) __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 19, __pyx_L3_error)
__pyx_L3_error:; __pyx_L3_error:;
__Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
@@ -1459,27 +1464,27 @@ static int __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted___ci
int __pyx_clineno = 0; int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__cinit__", 0); __Pyx_RefNannySetupContext("__cinit__", 0);
/* "bayesclass/cSelectFeatures.pyx":19 /* "bayesclass/cSelectFeatures.pyx":20
* cdef SelectKBestWeighted *thisptr * cdef SelectKBestWeighted *thisptr
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2 * def __cinit__(self, X, y, weights, k, natural=False): # log or log2
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) # <<<<<<<<<<<<<< * self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) # <<<<<<<<<<<<<<
* def __dealloc__(self): * def __dealloc__(self):
* del self.thisptr * del self.thisptr
*/ */
__pyx_t_1 = __pyx_convert_vector_from_py_std_3a__3a_vector_3c_int_3e___(__pyx_v_X); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error) __pyx_t_1 = __pyx_convert_vector_from_py_std_3a__3a_vector_3c_int_3e___(__pyx_v_X); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_2 = __pyx_convert_vector_from_py_int(__pyx_v_y); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error) __pyx_t_2 = __pyx_convert_vector_from_py_int(__pyx_v_y); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_3 = __pyx_convert_vector_from_py_features_3a__3a_precision_t(__pyx_v_weights); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error) __pyx_t_3 = __pyx_convert_vector_from_py_features_3a__3a_precision_t(__pyx_v_weights); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_4 = __Pyx_PyInt_As_int(__pyx_v_k); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error) __pyx_t_4 = __Pyx_PyInt_As_int(__pyx_v_k); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
__pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_natural); if (unlikely((__pyx_t_5 == ((bool)-1)) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L1_error) __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_natural); if (unlikely((__pyx_t_5 == ((bool)-1)) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
try { try {
__pyx_t_6 = new features::SelectKBestWeighted(__pyx_t_1, __pyx_t_2, __pyx_t_3, __pyx_t_4, __pyx_t_5); __pyx_t_6 = new features::SelectKBestWeighted(__pyx_t_1, __pyx_t_2, __pyx_t_3, __pyx_t_4, __pyx_t_5);
} catch(...) { } catch(...) {
__Pyx_CppExn2PyErr(); __Pyx_CppExn2PyErr();
__PYX_ERR(0, 19, __pyx_L1_error) __PYX_ERR(0, 20, __pyx_L1_error)
} }
__pyx_v_self->thisptr = __pyx_t_6; __pyx_v_self->thisptr = __pyx_t_6;
/* "bayesclass/cSelectFeatures.pyx":18 /* "bayesclass/cSelectFeatures.pyx":19
* cdef class CSelectKBestWeighted: * cdef class CSelectKBestWeighted:
* cdef SelectKBestWeighted *thisptr * cdef SelectKBestWeighted *thisptr
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2 # <<<<<<<<<<<<<< * def __cinit__(self, X, y, weights, k, natural=False): # log or log2 # <<<<<<<<<<<<<<
@@ -1498,7 +1503,7 @@ static int __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted___ci
return __pyx_r; return __pyx_r;
} }
/* "bayesclass/cSelectFeatures.pyx":20 /* "bayesclass/cSelectFeatures.pyx":21
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2 * def __cinit__(self, X, y, weights, k, natural=False): # log or log2
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) * self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural)
* def __dealloc__(self): # <<<<<<<<<<<<<< * def __dealloc__(self): # <<<<<<<<<<<<<<
@@ -1521,7 +1526,7 @@ static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__dealloc__", 0); __Pyx_RefNannySetupContext("__dealloc__", 0);
/* "bayesclass/cSelectFeatures.pyx":21 /* "bayesclass/cSelectFeatures.pyx":22
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) * self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural)
* def __dealloc__(self): * def __dealloc__(self):
* del self.thisptr # <<<<<<<<<<<<<< * del self.thisptr # <<<<<<<<<<<<<<
@@ -1530,7 +1535,7 @@ static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__
*/ */
delete __pyx_v_self->thisptr; delete __pyx_v_self->thisptr;
/* "bayesclass/cSelectFeatures.pyx":20 /* "bayesclass/cSelectFeatures.pyx":21
* def __cinit__(self, X, y, weights, k, natural=False): # log or log2 * def __cinit__(self, X, y, weights, k, natural=False): # log or log2
* self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) * self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural)
* def __dealloc__(self): # <<<<<<<<<<<<<< * def __dealloc__(self): # <<<<<<<<<<<<<<
@@ -1542,7 +1547,7 @@ static void __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_2__
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
} }
/* "bayesclass/cSelectFeatures.pyx":22 /* "bayesclass/cSelectFeatures.pyx":23
* def __dealloc__(self): * def __dealloc__(self):
* del self.thisptr * del self.thisptr
* def fit(self,): # <<<<<<<<<<<<<< * def fit(self,): # <<<<<<<<<<<<<<
@@ -1568,28 +1573,28 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("fit", 0); __Pyx_RefNannySetupContext("fit", 0);
/* "bayesclass/cSelectFeatures.pyx":23 /* "bayesclass/cSelectFeatures.pyx":24
* del self.thisptr * del self.thisptr
* def fit(self,): * def fit(self,):
* self.thisptr.fit() # <<<<<<<<<<<<<< * self.thisptr.fit() # <<<<<<<<<<<<<<
* return self * return self
* def get_score(self): * def get_scores(self):
*/ */
__pyx_v_self->thisptr->fit(); __pyx_v_self->thisptr->fit();
/* "bayesclass/cSelectFeatures.pyx":24 /* "bayesclass/cSelectFeatures.pyx":25
* def fit(self,): * def fit(self,):
* self.thisptr.fit() * self.thisptr.fit()
* return self # <<<<<<<<<<<<<< * return self # <<<<<<<<<<<<<<
* def get_score(self): * def get_scores(self):
* return self.thisptr.getScore() * return self.thisptr.getScores()
*/ */
__Pyx_XDECREF(__pyx_r); __Pyx_XDECREF(__pyx_r);
__Pyx_INCREF(((PyObject *)__pyx_v_self)); __Pyx_INCREF(((PyObject *)__pyx_v_self));
__pyx_r = ((PyObject *)__pyx_v_self); __pyx_r = ((PyObject *)__pyx_v_self);
goto __pyx_L0; goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":22 /* "bayesclass/cSelectFeatures.pyx":23
* def __dealloc__(self): * def __dealloc__(self):
* del self.thisptr * del self.thisptr
* def fit(self,): # <<<<<<<<<<<<<< * def fit(self,): # <<<<<<<<<<<<<<
@@ -1604,62 +1609,62 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
return __pyx_r; return __pyx_r;
} }
/* "bayesclass/cSelectFeatures.pyx":25 /* "bayesclass/cSelectFeatures.pyx":26
* self.thisptr.fit() * self.thisptr.fit()
* return self * return self
* def get_score(self): # <<<<<<<<<<<<<< * def get_scores(self): # <<<<<<<<<<<<<<
* return self.thisptr.getScore() * return self.thisptr.getScores()
* def get_version(self): * def get_features(self):
*/ */
/* Python wrapper */ /* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_score(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_scores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_score(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_scores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0; PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("get_score (wrapper)", 0); __Pyx_RefNannySetupContext("get_scores (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_score(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self)); __pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_scores(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */ /* function exit code */
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
return __pyx_r; return __pyx_r;
} }
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_score(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) { static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_6get_scores(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL; PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0; int __pyx_lineno = 0;
const char *__pyx_filename = NULL; const char *__pyx_filename = NULL;
int __pyx_clineno = 0; int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("get_score", 0); __Pyx_RefNannySetupContext("get_scores", 0);
/* "bayesclass/cSelectFeatures.pyx":26 /* "bayesclass/cSelectFeatures.pyx":27
* return self * return self
* def get_score(self): * def get_scores(self):
* return self.thisptr.getScore() # <<<<<<<<<<<<<< * return self.thisptr.getScores() # <<<<<<<<<<<<<<
* def get_version(self): * def get_features(self):
* return self.thisptr.version() * return self.thisptr.getFeatures()
*/ */
__Pyx_XDECREF(__pyx_r); __Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_convert_vector_to_py_features_3a__3a_precision_t(__pyx_v_self->thisptr->getScore()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error) __pyx_t_1 = __pyx_convert_vector_to_py_features_3a__3a_precision_t(__pyx_v_self->thisptr->getScores()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 27, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1); __Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1; __pyx_r = __pyx_t_1;
__pyx_t_1 = 0; __pyx_t_1 = 0;
goto __pyx_L0; goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":25 /* "bayesclass/cSelectFeatures.pyx":26
* self.thisptr.fit() * self.thisptr.fit()
* return self * return self
* def get_score(self): # <<<<<<<<<<<<<< * def get_scores(self): # <<<<<<<<<<<<<<
* return self.thisptr.getScore() * return self.thisptr.getScores()
* def get_version(self): * def get_features(self):
*/ */
/* function exit code */ /* function exit code */
__pyx_L1_error:; __pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.get_score", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.get_scores", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL; __pyx_r = NULL;
__pyx_L0:; __pyx_L0:;
__Pyx_XGIVEREF(__pyx_r); __Pyx_XGIVEREF(__pyx_r);
@@ -1667,28 +1672,91 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
return __pyx_r; return __pyx_r;
} }
/* "bayesclass/cSelectFeatures.pyx":27 /* "bayesclass/cSelectFeatures.pyx":28
* def get_score(self): * def get_scores(self):
* return self.thisptr.getScore() * return self.thisptr.getScores()
* def get_version(self): # <<<<<<<<<<<<<< * def get_features(self): # <<<<<<<<<<<<<<
* return self.thisptr.version() * return self.thisptr.getFeatures()
* def __reduce__(self): * def get_version(self):
*/ */
/* Python wrapper */ /* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_features(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_features(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0; PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("get_version (wrapper)", 0); __Pyx_RefNannySetupContext("get_features (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_version(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self)); __pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_features(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */ /* function exit code */
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
return __pyx_r; return __pyx_r;
} }
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) { static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_8get_features(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("get_features", 0);
/* "bayesclass/cSelectFeatures.pyx":29
* return self.thisptr.getScores()
* def get_features(self):
* return self.thisptr.getFeatures() # <<<<<<<<<<<<<<
* def get_version(self):
* return self.thisptr.version()
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_convert_vector_to_py_int(__pyx_v_self->thisptr->getFeatures()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":28
* def get_scores(self):
* return self.thisptr.getScores()
* def get_features(self): # <<<<<<<<<<<<<<
* return self.thisptr.getFeatures()
* def get_version(self):
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("bayesclass.cppSelectFeatures.CSelectKBestWeighted.get_features", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "bayesclass/cSelectFeatures.pyx":30
* def get_features(self):
* return self.thisptr.getFeatures()
* def get_version(self): # <<<<<<<<<<<<<<
* return self.thisptr.version()
* def __reduce__(self):
*/
/* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11get_version(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("get_version (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10get_version(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10get_version(struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL; PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_1 = NULL;
@@ -1697,23 +1765,23 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
int __pyx_clineno = 0; int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("get_version", 0); __Pyx_RefNannySetupContext("get_version", 0);
/* "bayesclass/cSelectFeatures.pyx":28 /* "bayesclass/cSelectFeatures.pyx":31
* return self.thisptr.getScore() * return self.thisptr.getFeatures()
* def get_version(self): * def get_version(self):
* return self.thisptr.version() # <<<<<<<<<<<<<< * return self.thisptr.version() # <<<<<<<<<<<<<<
* def __reduce__(self): * def __reduce__(self):
* return (CSelectKBestWeighted, ()) * return (CSelectKBestWeighted, ())
*/ */
__Pyx_XDECREF(__pyx_r); __Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_convert_PyBytes_string_to_py_std__in_string(__pyx_v_self->thisptr->version()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error) __pyx_t_1 = __pyx_convert_PyBytes_string_to_py_std__in_string(__pyx_v_self->thisptr->version()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1); __Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1; __pyx_r = __pyx_t_1;
__pyx_t_1 = 0; __pyx_t_1 = 0;
goto __pyx_L0; goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":27 /* "bayesclass/cSelectFeatures.pyx":30
* def get_score(self): * def get_features(self):
* return self.thisptr.getScore() * return self.thisptr.getFeatures()
* def get_version(self): # <<<<<<<<<<<<<< * def get_version(self): # <<<<<<<<<<<<<<
* return self.thisptr.version() * return self.thisptr.version()
* def __reduce__(self): * def __reduce__(self):
@@ -1730,7 +1798,7 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
return __pyx_r; return __pyx_r;
} }
/* "bayesclass/cSelectFeatures.pyx":29 /* "bayesclass/cSelectFeatures.pyx":32
* def get_version(self): * def get_version(self):
* return self.thisptr.version() * return self.thisptr.version()
* def __reduce__(self): # <<<<<<<<<<<<<< * def __reduce__(self): # <<<<<<<<<<<<<<
@@ -1738,19 +1806,19 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
*/ */
/* Python wrapper */ /* Python wrapper */
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_13__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { static PyObject *__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_13__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
PyObject *__pyx_r = 0; PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0); __Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0);
__pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10__reduce__(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self)); __pyx_r = __pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_12__reduce__(((struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *)__pyx_v_self));
/* function exit code */ /* function exit code */
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
return __pyx_r; return __pyx_r;
} }
static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_10__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) { static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_12__reduce__(CYTHON_UNUSED struct __pyx_obj_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted *__pyx_v_self) {
PyObject *__pyx_r = NULL; PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_1 = NULL;
@@ -1759,13 +1827,13 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
int __pyx_clineno = 0; int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__reduce__", 0); __Pyx_RefNannySetupContext("__reduce__", 0);
/* "bayesclass/cSelectFeatures.pyx":30 /* "bayesclass/cSelectFeatures.pyx":33
* return self.thisptr.version() * return self.thisptr.version()
* def __reduce__(self): * def __reduce__(self):
* return (CSelectKBestWeighted, ()) # <<<<<<<<<<<<<< * return (CSelectKBestWeighted, ()) # <<<<<<<<<<<<<<
*/ */
__Pyx_XDECREF(__pyx_r); __Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error) __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 33, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1); __Pyx_GOTREF(__pyx_t_1);
__Pyx_INCREF(((PyObject *)__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted)); __Pyx_INCREF(((PyObject *)__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted));
__Pyx_GIVEREF(((PyObject *)__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted)); __Pyx_GIVEREF(((PyObject *)__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted));
@@ -1777,7 +1845,7 @@ static PyObject *__pyx_pf_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighte
__pyx_t_1 = 0; __pyx_t_1 = 0;
goto __pyx_L0; goto __pyx_L0;
/* "bayesclass/cSelectFeatures.pyx":29 /* "bayesclass/cSelectFeatures.pyx":32
* def get_version(self): * def get_version(self):
* return self.thisptr.version() * return self.thisptr.version()
* def __reduce__(self): # <<<<<<<<<<<<<< * def __reduce__(self): # <<<<<<<<<<<<<<
@@ -2102,7 +2170,7 @@ static std::vector<features::precision_t> __pyx_convert_vector_from_py_features
* return v * return v
* *
*/ */
__pyx_t_5 = __pyx_PyFloat_AsDouble(__pyx_v_item); if (unlikely((__pyx_t_5 == ((features::precision_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 48, __pyx_L1_error) __pyx_t_5 = __pyx_PyFloat_AsFloat(__pyx_v_item); if (unlikely((__pyx_t_5 == ((features::precision_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 48, __pyx_L1_error)
__pyx_v_v.push_back(((features::precision_t)__pyx_t_5)); __pyx_v_v.push_back(((features::precision_t)__pyx_t_5));
/* "vector.from_py":47 /* "vector.from_py":47
@@ -2210,6 +2278,63 @@ static PyObject *__pyx_convert_vector_to_py_features_3a__3a_precision_t(const st
return __pyx_r; return __pyx_r;
} }
static PyObject *__pyx_convert_vector_to_py_int(const std::vector<int> &__pyx_v_v) {
size_t __pyx_v_i;
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
size_t __pyx_t_2;
size_t __pyx_t_3;
size_t __pyx_t_4;
PyObject *__pyx_t_5 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__pyx_convert_vector_to_py_int", 0);
/* "vector.to_py":61
* @cname("__pyx_convert_vector_to_py_int")
* cdef object __pyx_convert_vector_to_py_int(vector[X]& v):
* return [v[i] for i in range(v.size())] # <<<<<<<<<<<<<<
*
*
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 61, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __pyx_v_v.size();
__pyx_t_3 = __pyx_t_2;
for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
__pyx_v_i = __pyx_t_4;
__pyx_t_5 = __Pyx_PyInt_From_int((__pyx_v_v[__pyx_v_i])); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 61, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_5);
if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_5))) __PYX_ERR(1, 61, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
}
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "vector.to_py":60
*
* @cname("__pyx_convert_vector_to_py_int")
* cdef object __pyx_convert_vector_to_py_int(vector[X]& v): # <<<<<<<<<<<<<<
* return [v[i] for i in range(v.size())]
*
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_5);
__Pyx_AddTraceback("vector.to_py.__pyx_convert_vector_to_py_int", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "string.to_py":31 /* "string.to_py":31
* *
* @cname("__pyx_convert_PyObject_string_to_py_std__in_string") * @cname("__pyx_convert_PyObject_string_to_py_std__in_string")
@@ -2493,9 +2618,10 @@ static void __pyx_tp_dealloc_10bayesclass_17cppSelectFeatures_CSelectKBestWeight
static PyMethodDef __pyx_methods_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted[] = { static PyMethodDef __pyx_methods_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted[] = {
{"fit", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_5fit, METH_NOARGS, 0}, {"fit", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_5fit, METH_NOARGS, 0},
{"get_score", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_score, METH_NOARGS, 0}, {"get_scores", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_7get_scores, METH_NOARGS, 0},
{"get_version", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_version, METH_NOARGS, 0}, {"get_features", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_9get_features, METH_NOARGS, 0},
{"__reduce__", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11__reduce__, METH_NOARGS, 0}, {"get_version", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_11get_version, METH_NOARGS, 0},
{"__reduce__", (PyCFunction)__pyx_pw_10bayesclass_17cppSelectFeatures_20CSelectKBestWeighted_13__reduce__, METH_NOARGS, 0},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@@ -2690,14 +2816,14 @@ static int __Pyx_modinit_type_init_code(void) {
int __pyx_clineno = 0; int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
/*--- Type init code ---*/ /*--- Type init code ---*/
if (PyType_Ready(&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 16, __pyx_L1_error) if (PyType_Ready(&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 17, __pyx_L1_error)
#if PY_VERSION_HEX < 0x030800B1 #if PY_VERSION_HEX < 0x030800B1
__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_print = 0; __pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_print = 0;
#endif #endif
if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_dictoffset && __pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_getattro == PyObject_GenericGetAttr)) { if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_dictoffset && __pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_getattro == PyObject_GenericGetAttr)) {
__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_getattro = __Pyx_PyObject_GenericGetAttr; __pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted.tp_getattro = __Pyx_PyObject_GenericGetAttr;
} }
if (PyObject_SetAttr(__pyx_m, __pyx_n_s_CSelectKBestWeighted, (PyObject *)&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 16, __pyx_L1_error) if (PyObject_SetAttr(__pyx_m, __pyx_n_s_CSelectKBestWeighted, (PyObject *)&__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted) < 0) __PYX_ERR(0, 17, __pyx_L1_error)
__pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted = &__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted; __pyx_ptype_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted = &__pyx_type_10bayesclass_17cppSelectFeatures_CSelectKBestWeighted;
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
return 0; return 0;
@@ -3707,6 +3833,44 @@ raise_neg_overflow:
return (int) -1; return (int) -1;
} }
/* CIntToPy */
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
const int neg_one = (int) -1, const_zero = (int) 0;
#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
const int is_unsigned = neg_one > const_zero;
if (is_unsigned) {
if (sizeof(int) < sizeof(long)) {
return PyInt_FromLong((long) value);
} else if (sizeof(int) <= sizeof(unsigned long)) {
return PyLong_FromUnsignedLong((unsigned long) value);
#ifdef HAVE_LONG_LONG
} else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
#endif
}
} else {
if (sizeof(int) <= sizeof(long)) {
return PyInt_FromLong((long) value);
#ifdef HAVE_LONG_LONG
} else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
return PyLong_FromLongLong((PY_LONG_LONG) value);
#endif
}
}
{
int one = 1; int little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&value;
return _PyLong_FromByteArray(bytes, sizeof(int),
little, !is_unsigned);
}
}
/* CIntFromPy */ /* CIntFromPy */
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) {
#ifdef __Pyx_HAS_GCC_DIAGNOSTIC #ifdef __Pyx_HAS_GCC_DIAGNOSTIC

View File

@@ -6,12 +6,13 @@ from libcpp cimport bool
cdef extern from "FeatureSelect.h" namespace "features": cdef extern from "FeatureSelect.h" namespace "features":
ctypedef double precision_t ctypedef float precision_t
cdef cppclass SelectKBestWeighted: cdef cppclass SelectKBestWeighted:
SelectKBestWeighted(vector[vector[int]]&, vector[int]&, vector[precision_t]&, int, bool) except + SelectKBestWeighted(vector[vector[int]]&, vector[int]&, vector[precision_t]&, int, bool) except +
void fit() void fit()
string version() string version()
vector[precision_t] getScore() vector[precision_t] getScores()
vector[int] getFeatures()
cdef class CSelectKBestWeighted: cdef class CSelectKBestWeighted:
cdef SelectKBestWeighted *thisptr cdef SelectKBestWeighted *thisptr
@@ -22,8 +23,10 @@ cdef class CSelectKBestWeighted:
def fit(self,): def fit(self,):
self.thisptr.fit() self.thisptr.fit()
return self return self
def get_score(self): def get_scores(self):
return self.thisptr.getScore() return self.thisptr.getScores()
def get_features(self):
return self.thisptr.getFeatures()
def get_version(self): def get_version(self):
return self.thisptr.version() return self.thisptr.version()
def __reduce__(self): def __reduce__(self):

View File

@@ -1,24 +0,0 @@
double conditionalEntropy(std::vector<int>& classVec, std::vector<int>& featureVec, std::vector<double>& weightsVec)
{
std::map<int, std::vector<int>> classesPerFeatureValue;
std::map<int, std::vector<double>> weightsPerFeatureValue;
for (int i = 0; i < featureVec.size(); i++) {
classesPerFeatureValue[featureVec[i]].push_back(classVec[i]);
weightsPerFeatureValue[featureVec[i]].push_back(weightsVec[i]);
}
double totalEntropy = 0;
double totalWeight = 0;
for (auto& pair : classesPerFeatureValue) {
double featureValueEntropy = calculateEntropy(pair.second, weightsPerFeatureValue[pair.first]);
double featureValueWeight = 0;
for (double weight : weightsPerFeatureValue[pair.first]) {
featureValueWeight += weight;
}
totalEntropy += featureValueWeight * featureValueEntropy;
totalWeight += featureValueWeight;
}
return totalEntropy / totalWeight;
}

View File

@@ -15,7 +15,7 @@ from pgmpy.models import BayesianNetwork
from pgmpy.base import DAG from pgmpy.base import DAG
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from fimdlp.mdlp import FImdlp from fimdlp.mdlp import FImdlp
from .feature_selection import SelectKBestWeighted from .cppSelectFeatures import CSelectKBestWeighted
from ._version import __version__ from ._version import __version__
@@ -869,15 +869,39 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
self.nodes_leaves = self.nodes_edges self.nodes_leaves = self.nodes_edges
return self return self
def mutual_info_classif_weighted(X, y, sample_weight): def version(self):
# Compute the mutual information between each feature and the target if hasattr(self, "fitted_"):
mi = mutual_info_classif(X, y) return self.estimator_.version()
return SPODE(None, False).version()
# Multiply the mutual information scores with the sample weights @property
mi_weighted = mi * sample_weight def states_(self):
if hasattr(self, "fitted_"):
return sum(
[
len(item)
for model in self.estimators_
for _, item in model.model_.states.items()
]
) / len(self.estimators_)
return 0
# Return the weighted mutual information scores @property
return mi_weighted def depth_(self):
return self.states_
def nodes_edges(self):
nodes = 0
edges = 0
if hasattr(self, "fitted_"):
nodes = sum([len(x.dag_) for x in self.estimators_])
edges = sum([len(x.dag_.edges()) for x in self.estimators_])
return nodes, edges
def plot(self, title=""):
warnings.simplefilter("ignore", UserWarning)
for idx, model in enumerate(self.estimators_):
model.plot(title=f"{idx} {title}")
def _train(self, kwargs): def _train(self, kwargs):
"""Build boosted SPODEs""" """Build boosted SPODEs"""
@@ -885,14 +909,12 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
# Step 0: Set the finish condition # Step 0: Set the finish condition
for num in range(self.n_estimators): for num in range(self.n_estimators):
# Step 1: Build ranking with mutual information # Step 1: Build ranking with mutual information
# OJO MAL, ESTO NO ACTUALIZA EL RANKING CON LOS PESOS n_feature = (
# SIEMPRE VA A SACAR LO MISMO CSelectKBestWeighted(self.X_, self.y_, weights, k=1)
feature = ( .fit()
SelectKBestWeighted(k=1) .get_features()[0]
.fit(self.X_, self.y_, weights)
.get_feature_names_out(self.feature_names_in_)
.tolist()
) )
feature = self.feature_names_in_[n_feature]
# Step 2: Build & train spode with the first feature as sparent # Step 2: Build & train spode with the first feature as sparent
estimator = clone(self.estimator_) estimator = clone(self.estimator_)
_args = kwargs.copy() _args = kwargs.copy()
@@ -910,8 +932,8 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1) am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
# Step 3.2: Update weights for next classifier # Step 3.2: Update weights for next classifier
weights = [ weights = [
wm * np.exp(am * (ym != y_pred)) wm * np.exp(am * (ym != yp))
for wm, ym in zip(weights, self.y_) for wm, ym, yp in zip(weights, self.y_, y_pred)
] ]
# Step 4: Add the new model # Step 4: Add the new model
self.estimators_.append(estimator) self.estimators_.append(estimator)

View File

@@ -1,48 +0,0 @@
#include <unordered_map>
#include <vector>
#include <cmath>
using namespace std;
using value_t = int;
using precision_t = double;
using sample_t = vector<value_t>;
using score_t = vector<precision_t>;
precision_t entropy(const sample_t& data, const vector<precision_t>& weights)
{
precision_t p;
precision_t ventropy = 0, totalWeight = 0;
score_t counts(*max_element(data.begin(), data.end()) + 1, 0);
for (auto i = 0; i < data.size(); ++i) {
counts[data[i]] += weights[i];
totalWeight += weights[i];
}
for (auto count : counts) {
if (count > 0 || count < 0) {
p = (count) / totalWeight;
ventropy -= p * log2(p);
}
}
return ventropy;
}
precision_t conditionalEntropy(const sample_t& feature, const sample_t& labels, const vector<precision_t>& weights)
{
unordered_map<value_t, precision_t> featureCounts;
unordered_map<value_t, unordered_map<value_t, precision_t>> jointCounts;
featureCounts.clear();
jointCounts.clear();
auto totalWeight = 0.0;
for (auto i = 0; i < feature.size(); i++) {
featureCounts[feature[i]] += weights[i];
jointCounts[feature[i]][labels[i]] += weights[i];
totalWeight += weights[i];
}
precision_t entropy = 0;
for (auto& [f, count] : featureCounts) {
auto p_f = count / totalWeight;
entropy += p_f * ::entropy(jointCounts[f], weights) / ::entropy(feature, weights);
}
return entropy;
}

View File

@@ -1,93 +1,93 @@
import numpy as np # import numpy as np
from sklearn.feature_selection import mutual_info_classif # from sklearn.feature_selection import mutual_info_classif
from sklearn.utils.validation import check_X_y, check_is_fitted # from sklearn.utils.validation import check_X_y, check_is_fitted
from sklearn.feature_selection._univariate_selection import ( # from sklearn.feature_selection._univariate_selection import (
_BaseFilter, # _BaseFilter,
_clean_nans, # _clean_nans,
) # )
""" # """
Compute the weighted mutual information between each feature and the # Compute the weighted mutual information between each feature and the
target. # target.
Based on # Based on
Silviu Guiaşu, # Silviu Guiaşu,
Weighted entropy, # Weighted entropy,
Reports on Mathematical Physics, # Reports on Mathematical Physics,
Volume 2, Issue 3, # Volume 2, Issue 3,
1971, # 1971,
Pages 165-179, # Pages 165-179,
ISSN 0034-4877, # ISSN 0034-4877,
https://doi.org/10.1016/0034-4877(71)90002-4. # https://doi.org/10.1016/0034-4877(71)90002-4.
(https://www.sciencedirect.com/science/article/pii/0034487771900024) # (https://www.sciencedirect.com/science/article/pii/0034487771900024)
Abstract: Weighted entropy is the measure of information supplied by a # Abstract: Weighted entropy is the measure of information supplied by a
probablistic experiment whose elementary events are characterized both by their # probablistic experiment whose elementary events are characterized both by their
objective probabilities and by some qualitative (objective or subjective) # objective probabilities and by some qualitative (objective or subjective)
weights. The properties, the axiomatics and the maximum value of the weighted # weights. The properties, the axiomatics and the maximum value of the weighted
entropy are given. # entropy are given.
""" # """
class SelectKBestWeighted(_BaseFilter): # class SelectKBestWeighted(_BaseFilter):
def __init__(self, *, k=10): # def __init__(self, *, k=10):
super().__init__(score_func=mutual_info_classif) # super().__init__(score_func=mutual_info_classif)
self.k = k # self.k = k
def _check_params(self, X, y): # def _check_params(self, X, y):
if self.k > X.shape[1] or self.k < 1: # if self.k > X.shape[1] or self.k < 1:
raise ValueError( # raise ValueError(
f"k must be between 1 and {X.shape[1]} got {self.k}." # f"k must be between 1 and {X.shape[1]} got {self.k}."
) # )
def _get_support_mask(self): # def _get_support_mask(self):
check_is_fitted(self) # check_is_fitted(self)
if self.k == "all": # if self.k == "all":
return np.ones(self.scores_.shape, dtype=bool) # return np.ones(self.scores_.shape, dtype=bool)
elif self.k == 0: # elif self.k == 0:
return np.zeros(self.scores_.shape, dtype=bool) # return np.zeros(self.scores_.shape, dtype=bool)
else: # else:
scores = _clean_nans(self.scores_) # scores = _clean_nans(self.scores_)
mask = np.zeros(scores.shape, dtype=bool) # mask = np.zeros(scores.shape, dtype=bool)
# Request a stable sort. Mergesort takes more memory (~40MB per # # Request a stable sort. Mergesort takes more memory (~40MB per
# megafeature on x86-64). # # megafeature on x86-64).
mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1 # mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1
return mask # return mask
def fit(self, X, y, sample_weight): # def fit(self, X, y, sample_weight):
self.X_, self.y_ = check_X_y(X, y) # self.X_, self.y_ = check_X_y(X, y)
self._check_params(X, y) # self._check_params(X, y)
self.n_features_in_ = X.shape[1] # self.n_features_in_ = X.shape[1]
self.sample_weight_ = sample_weight # self.sample_weight_ = sample_weight
# Compute the entropy of the target variable # # Compute the entropy of the target variable
entropy_y = -np.sum( # entropy_y = -np.sum(
np.multiply( # np.multiply(
np.bincount(y, weights=sample_weight), # np.bincount(y, weights=sample_weight),
np.log(np.bincount(y, weights=sample_weight)), # np.log(np.bincount(y, weights=sample_weight)),
) # )
) # )
# Compute the mutual information between each feature and the target # # Compute the mutual information between each feature and the target
mi = self.score_func(X, y) # mi = self.score_func(X, y)
# Compute the weighted entropy of each feature # # Compute the weighted entropy of each feature
entropy_weighted = [] # entropy_weighted = []
for i in range(X.shape[1]): # for i in range(X.shape[1]):
# Compute the weighted frequency of each unique value of the # # Compute the weighted frequency of each unique value of the
# feature # # feature
freq_weighted = np.bincount(X[:, i], weights=sample_weight) # freq_weighted = np.bincount(X[:, i], weights=sample_weight)
freq_weighted = freq_weighted[freq_weighted != 0] # freq_weighted = freq_weighted[freq_weighted != 0]
# Compute the weighted entropy of the feature # # Compute the weighted entropy of the feature
entropy_weighted.append( # entropy_weighted.append(
-np.sum(np.multiply(freq_weighted, np.log(freq_weighted))) # -np.sum(np.multiply(freq_weighted, np.log(freq_weighted)))
/ np.sum(sample_weight) # / np.sum(sample_weight)
) # )
# Compute the weighted mutual information between each feature and # # Compute the weighted mutual information between each feature and
# the target # # the target
mi_weighted = mi * entropy_weighted / entropy_y # mi_weighted = mi * entropy_weighted / entropy_y
# Return the weighted mutual information scores # # Return the weighted mutual information scores
self.scores_ = mi_weighted # self.scores_ = mi_weighted
return self # return self

10
test.py
View File

@@ -1,10 +0,0 @@
from bayesclass.cppSelectFeatures import CSelectKBestWeighted
X = [[x for x in range(i, i + 3)] for i in range(1, 30, 3)]
weights = [25 / (i + 1) for i in range(10)]
labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
test = CSelectKBestWeighted(X, labels, weights, 3)
test.fit()
for item in test.get_score():
print(item)