mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-16 16:06:01 +00:00
Update Readme
Add max_features > n_features test Add make doc
This commit is contained in:
5
Makefile
5
Makefile
@@ -21,6 +21,9 @@ push: ## Push code with tags
|
|||||||
test: ## Run tests
|
test: ## Run tests
|
||||||
python -m unittest -v stree.tests
|
python -m unittest -v stree.tests
|
||||||
|
|
||||||
|
doc: ## Update documentation
|
||||||
|
make -C docs --makefile=Makefile html
|
||||||
|
|
||||||
help: ## Show help message
|
help: ## Show help message
|
||||||
@IFS=$$'\n' ; \
|
@IFS=$$'\n' ; \
|
||||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||||
@@ -36,4 +39,4 @@ help: ## Show help message
|
|||||||
printf "%-20s %s" $$help_command ; \
|
printf "%-20s %s" $$help_command ; \
|
||||||
printf '\033[0m'; \
|
printf '\033[0m'; \
|
||||||
printf "%s\n" $$help_info; \
|
printf "%s\n" $$help_info; \
|
||||||
done
|
done
|
@@ -48,7 +48,7 @@ Can be found in
|
|||||||
| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features). <br>Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. |
|
| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features). <br>Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. |
|
||||||
| | min_samples_split | \<int\> | 0 | The minimum number of samples required to split an internal node. 0 (default) for any |
|
| | min_samples_split | \<int\> | 0 | The minimum number of samples required to split an internal node. 0 (default) for any |
|
||||||
| | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:<br>If int, then consider max_features features at each split.<br>If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.<br>If “auto”, then max_features=sqrt(n_features).<br>If “sqrt”, then max_features=sqrt(n_features).<br>If “log2”, then max_features=log2(n_features).<br>If None, then max_features=n_features. |
|
| | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:<br>If int, then consider max_features features at each split.<br>If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.<br>If “auto”, then max_features=sqrt(n_features).<br>If “sqrt”, then max_features=sqrt(n_features).<br>If “log2”, then max_features=log2(n_features).<br>If None, then max_features=n_features. |
|
||||||
| | splitter | {"best", "random"} | random | The strategy used to choose the feature set at each node (only used if max_features != num_features). <br>Supported strategies are “best” to choose the best feature set and “random” to choose a random combination. <br>The algorithm generates 5 candidates at most to choose from in both strategies. |
|
| | splitter | {"best", "random"} | random | The strategy used to choose the feature set at each node (only used if max_features < num_features). <br>Supported strategies are “best” to choose the best feature set and “random” to choose a random combination. <br>The algorithm generates 5 candidates at most to choose from if random is selected. If best is selected sklearn SelectKBest algorithm is used in every node to choose the _max_features_ best features |
|
||||||
| | normalize | \<bool\> | False | If standardization of features should be applied on each node with the samples that reach it |
|
| | normalize | \<bool\> | False | If standardization of features should be applied on each node with the samples that reach it |
|
||||||
|
|
||||||
\* Hyperparameter used by the support vector classifier of every node
|
\* Hyperparameter used by the support vector classifier of every node
|
||||||
|
6
setup.py
6
setup.py
@@ -16,7 +16,11 @@ setuptools.setup(
|
|||||||
long_description=readme(),
|
long_description=readme(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
packages=setuptools.find_packages(),
|
packages=setuptools.find_packages(),
|
||||||
url=stree.__url__,
|
url="https://github.com/Doctorado-ML/STree#stree",
|
||||||
|
project_urls={
|
||||||
|
"Code": "https://github.com/Doctorado-ML/STree",
|
||||||
|
"Documentation": "https://stree.readthedocs.io/en/latest/index.html",
|
||||||
|
},
|
||||||
author=stree.__author__,
|
author=stree.__author__,
|
||||||
author_email=stree.__author_email__,
|
author_email=stree.__author_email__,
|
||||||
keywords="scikit-learn oblique-classifier oblique-decision-tree decision-\
|
keywords="scikit-learn oblique-classifier oblique-decision-tree decision-\
|
||||||
|
@@ -653,12 +653,12 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
self.n_features_ = X.shape[1]
|
self.n_features_ = X.shape[1]
|
||||||
self.n_features_in_ = X.shape[1]
|
self.n_features_in_ = X.shape[1]
|
||||||
self.max_features_ = self._initialize_max_features()
|
self.max_features_ = self._initialize_max_features()
|
||||||
self.tree_ = self.train(X, y, sample_weight, 1, "root")
|
self.tree_ = self._train(X, y, sample_weight, 1, "root")
|
||||||
self.X_ = X
|
self.X_ = X
|
||||||
self.y_ = y
|
self.y_ = y
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def train(
|
def _train(
|
||||||
self,
|
self,
|
||||||
X: np.ndarray,
|
X: np.ndarray,
|
||||||
y: np.ndarray,
|
y: np.ndarray,
|
||||||
@@ -723,10 +723,10 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
node.make_predictor()
|
node.make_predictor()
|
||||||
return node
|
return node
|
||||||
node.set_up(
|
node.set_up(
|
||||||
self.train(X_U, y_u, sw_u, depth + 1, title + f" - Up({depth+1})")
|
self._train(X_U, y_u, sw_u, depth + 1, title + f" - Up({depth+1})")
|
||||||
)
|
)
|
||||||
node.set_down(
|
node.set_down(
|
||||||
self.train(
|
self._train(
|
||||||
X_D, y_d, sw_d, depth + 1, title + f" - Down({depth+1})"
|
X_D, y_d, sw_d, depth + 1, title + f" - Down({depth+1})"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -892,6 +892,12 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
elif self.max_features is None:
|
elif self.max_features is None:
|
||||||
max_features = self.n_features_
|
max_features = self.n_features_
|
||||||
elif isinstance(self.max_features, numbers.Integral):
|
elif isinstance(self.max_features, numbers.Integral):
|
||||||
|
if self.max_features > self.n_features_:
|
||||||
|
raise ValueError(
|
||||||
|
"Invalid value for max_features. "
|
||||||
|
"It can not be greater than number of features "
|
||||||
|
f"({self.n_features_})"
|
||||||
|
)
|
||||||
max_features = self.max_features
|
max_features = self.max_features
|
||||||
else: # float
|
else: # float
|
||||||
if self.max_features > 0.0:
|
if self.max_features > 0.0:
|
||||||
|
@@ -6,6 +6,5 @@ __author__ = "Ricardo Montañana Gómez"
|
|||||||
__copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez"
|
__copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez"
|
||||||
__license__ = "MIT License"
|
__license__ = "MIT License"
|
||||||
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
||||||
__url__ = "https://github.com/doctorado-ml/stree"
|
|
||||||
|
|
||||||
__all__ = ["Stree", "Snode", "Siterator", "Splitter"]
|
__all__ = ["Stree", "Snode", "Siterator", "Splitter"]
|
||||||
|
@@ -269,6 +269,12 @@ class Stree_test(unittest.TestCase):
|
|||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
_ = clf._initialize_max_features()
|
_ = clf._initialize_max_features()
|
||||||
|
|
||||||
|
def test_wrong_max_features(self):
|
||||||
|
X, y = load_dataset(n_features=15)
|
||||||
|
clf = Stree(max_features=16)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
clf.fit(X, y)
|
||||||
|
|
||||||
def test_get_subspaces(self):
|
def test_get_subspaces(self):
|
||||||
dataset = np.random.random((10, 16))
|
dataset = np.random.random((10, 16))
|
||||||
y = np.random.randint(0, 2, 10)
|
y = np.random.randint(0, 2, 10)
|
||||||
|
Reference in New Issue
Block a user