diff --git a/Makefile b/Makefile
index ca256b9..542d18e 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,9 @@ push:  ## Push code with tags
 test:  ## Run tests
 	python -m unittest -v stree.tests
 
+doc:  ## Update documentation
+	make -C docs --makefile=Makefile html
+
 help: ## Show help message
 	@IFS=$$'\n' ; \
 	help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
@@ -36,4 +39,4 @@ help: ## Show help message
 		printf "%-20s %s" $$help_command ; \
 		printf '\033[0m'; \
 		printf "%s\n" $$help_info; \
-	done
+	done
\ No newline at end of file
diff --git a/README.md b/README.md
index 860b901..f8c4139 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ Can be found in
 |     | criterion          | {“gini”, “entropy”}                                    | entropy     | The function to measure the quality of a split (only used if max_features != num_features). <br>Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain.                                                                                                                                                                                                                                                          |
 |     | min_samples_split  | \<int\>                                                | 0           | The minimum number of samples required to split an internal node. 0 (default) for any                                                                                                                                                                                                                                                                                                                                                                |
 |     | max_features       | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None        | The number of features to consider when looking for the split:<br>If int, then consider max_features features at each split.<br>If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.<br>If “auto”, then max_features=sqrt(n_features).<br>If “sqrt”, then max_features=sqrt(n_features).<br>If “log2”, then max_features=log2(n_features).<br>If None, then max_features=n_features. |
-|     | splitter           | {"best", "random"}                                     | random      | The strategy used to choose the feature set at each node (only used if max_features != num_features). <br>Supported strategies are “best” to choose the best feature set and “random” to choose a random combination. <br>The algorithm generates 5 candidates at most to choose from in both strategies.                                                                                                                                            |
+|     | splitter           | {"best", "random"}                                     | random      | The strategy used to choose the feature set at each node (only used if max_features < num_features). <br>Supported strategies are “best” to choose the best feature set and “random” to choose a random combination. <br>The algorithm generates 5 candidates at most to choose from if random is selected. If best is selected sklearn SelectKBest algorithm is used in every node to choose the _max_features_ best features                       |
 |     | normalize          | \<bool\>                                               | False       | If standardization of features should be applied on each node with the samples that reach it                                                                                                                                                                                                                                                                                                                                                         |
 
 \* Hyperparameter used by the support vector classifier of every node
diff --git a/setup.py b/setup.py
index b56823d..959d6dd 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,11 @@ setuptools.setup(
     long_description=readme(),
     long_description_content_type="text/markdown",
     packages=setuptools.find_packages(),
-    url=stree.__url__,
+    url="https://github.com/Doctorado-ML/STree#stree",
+    project_urls={
+        "Code": "https://github.com/Doctorado-ML/STree",
+        "Documentation": "https://stree.readthedocs.io/en/latest/index.html",
+    },
     author=stree.__author__,
     author_email=stree.__author_email__,
     keywords="scikit-learn oblique-classifier oblique-decision-tree decision-\
diff --git a/stree/Strees.py b/stree/Strees.py
index 2f40eb1..3062364 100644
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -653,12 +653,12 @@ class Stree(BaseEstimator, ClassifierMixin):
         self.n_features_ = X.shape[1]
         self.n_features_in_ = X.shape[1]
         self.max_features_ = self._initialize_max_features()
-        self.tree_ = self.train(X, y, sample_weight, 1, "root")
+        self.tree_ = self._train(X, y, sample_weight, 1, "root")
         self.X_ = X
         self.y_ = y
         return self
 
-    def train(
+    def _train(
         self,
         X: np.ndarray,
         y: np.ndarray,
@@ -723,10 +723,10 @@ class Stree(BaseEstimator, ClassifierMixin):
             node.make_predictor()
             return node
         node.set_up(
-            self.train(X_U, y_u, sw_u, depth + 1, title + f" - Up({depth+1})")
+            self._train(X_U, y_u, sw_u, depth + 1, title + f" - Up({depth+1})")
         )
         node.set_down(
-            self.train(
+            self._train(
                 X_D, y_d, sw_d, depth + 1, title + f" - Down({depth+1})"
             )
         )
@@ -892,6 +892,12 @@ class Stree(BaseEstimator, ClassifierMixin):
         elif self.max_features is None:
             max_features = self.n_features_
         elif isinstance(self.max_features, numbers.Integral):
+            if self.max_features > self.n_features_:
+                raise ValueError(
+                    "Invalid value for max_features. "
+                    "It can not be greater than number of features "
+                    f"({self.n_features_})"
+                )
             max_features = self.max_features
         else:  # float
             if self.max_features > 0.0:
diff --git a/stree/__init__.py b/stree/__init__.py
index d58a553..eddafae 100644
--- a/stree/__init__.py
+++ b/stree/__init__.py
@@ -6,6 +6,5 @@ __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez"
 __license__ = "MIT License"
 __author_email__ = "ricardo.montanana@alu.uclm.es"
-__url__ = "https://github.com/doctorado-ml/stree"
 
 __all__ = ["Stree", "Snode", "Siterator", "Splitter"]
diff --git a/stree/tests/Stree_test.py b/stree/tests/Stree_test.py
index de9861c..d05b322 100644
--- a/stree/tests/Stree_test.py
+++ b/stree/tests/Stree_test.py
@@ -269,6 +269,12 @@ class Stree_test(unittest.TestCase):
             with self.assertRaises(ValueError):
                 _ = clf._initialize_max_features()
 
+    def test_wrong_max_features(self):
+        X, y = load_dataset(n_features=15)
+        clf = Stree(max_features=16)
+        with self.assertRaises(ValueError):
+            clf.fit(X, y)
+
     def test_get_subspaces(self):
         dataset = np.random.random((10, 16))
         y = np.random.randint(0, 2, 10)