mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 16:22:00 +00:00
Compare commits
25 Commits
Author | SHA1 | Date | |
---|---|---|---|
0f4ea8542e | |||
0fa37dfdf3 | |||
582ff44cb1 | |||
66fe0bbe48 | |||
1d906e2cb3 | |||
|
31a3753046 | ||
3c0f03d27e | |||
d644031f3f | |||
1790f7fee7 | |||
624f79af0d | |||
40031b7d52 | |||
bcc763e656 | |||
b19264b1eb | |||
02e75b3c3e | |||
52d1095161 | |||
f9b83adfee | |||
|
382a420791 | ||
|
7aa4156e51 | ||
|
0df2f243a5 | ||
|
d3ceb3ce46 | ||
|
cabf926eb1 | ||
|
7300bd66db | ||
114f53d5e8 | |||
267a17a708 | |||
e01ca43cf9 |
14
.github/workflows/codeql-analysis.yml
vendored
14
.github/workflows/codeql-analysis.yml
vendored
@ -2,12 +2,12 @@ name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
branches: [master]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ master ]
|
||||
branches: [master]
|
||||
schedule:
|
||||
- cron: '16 17 * * 3'
|
||||
- cron: "16 17 * * 3"
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
@ -17,7 +17,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'python' ]
|
||||
language: ["python"]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
|
||||
# Learn more:
|
||||
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
|
||||
@ -28,7 +28,7 @@ jobs:
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@ -39,7 +39,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v1
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@ -53,4 +53,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v1
|
||||
uses: github/codeql-action/analyze@v2
|
||||
|
8
.github/workflows/main.yml
vendored
8
.github/workflows/main.yml
vendored
@ -13,12 +13,12 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [macos-latest, ubuntu-latest, windows-latest]
|
||||
python: [3.8, 3.9, "3.10"]
|
||||
python: [3.11, 3.12]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Install dependencies
|
||||
@ -35,7 +35,7 @@ jobs:
|
||||
coverage run -m unittest -v odte.tests
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
files: ./coverage.xml
|
||||
|
@ -1,23 +1,23 @@
|
||||
repos:
|
||||
- repo: https://github.com/ambv/black
|
||||
rev: 20.8b1
|
||||
rev: 22.3.0
|
||||
hooks:
|
||||
- id: black
|
||||
exclude: ".virtual_documents"
|
||||
language_version: python3.9
|
||||
- repo: https://gitlab.com/pycqa/flake8
|
||||
rev: 3.8.4
|
||||
rev: 3.9.2
|
||||
hooks:
|
||||
- id: flake8
|
||||
exclude: ".virtual_documents"
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: "v0.790" # Use the sha / tag you want to point at
|
||||
rev: "v0.942" # Use the sha / tag you want to point at
|
||||
hooks:
|
||||
- id: mypy
|
||||
#args: [--strict, --ignore-missing-imports]
|
||||
exclude: odte/tests
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.4.0
|
||||
rev: v4.2.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: check-case-conflict
|
||||
|
14
.readthedocs.yaml
Normal file
14
.readthedocs.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
version: 2
|
||||
|
||||
sphinx:
|
||||
configuration: docs/source/conf.py
|
||||
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.12"
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: requirements.txt
|
||||
- requirements: docs/requirements.txt
|
1
MANIFEST.in
Normal file
1
MANIFEST.in
Normal file
@ -0,0 +1 @@
|
||||
include README.md LICENSE
|
43
Makefile
43
Makefile
@ -1,44 +1,35 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: coverage deps help lint push test doc build
|
||||
.PHONY: audit coverage help lint test doc doc-clean build
|
||||
|
||||
coverage: ## Run tests with coverage
|
||||
coverage erase
|
||||
coverage run -m unittest -v odte.tests
|
||||
coverage report -m
|
||||
@coverage erase
|
||||
@coverage run -m unittest -v odte.tests
|
||||
@coverage report -m
|
||||
|
||||
deps: ## Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
devdeps: ## Install development dependencies
|
||||
pip install black pip-audit flake8 mypy coverage
|
||||
|
||||
lint: ## Lint and static-check
|
||||
black odte
|
||||
flake8 odte
|
||||
mypy odte --exclude tests
|
||||
lint: ## Lint source files
|
||||
@black odte
|
||||
@flake8 odte
|
||||
@mypy odte
|
||||
|
||||
audit: ## Audit pip
|
||||
pip-audit
|
||||
|
||||
push: ## Push code with tags
|
||||
git push && git push --tags
|
||||
@pip-audit
|
||||
|
||||
test: ## Run tests
|
||||
python -m unittest -v odte.tests
|
||||
@python -m unittest -v odte.tests
|
||||
|
||||
doc: ## Update documentation
|
||||
make -C docs --makefile=Makefile html
|
||||
@make -C docs --makefile=Makefile html
|
||||
|
||||
build: ## Build package
|
||||
rm -fr dist/*
|
||||
rm -fr build/*
|
||||
python setup.py sdist bdist_wheel
|
||||
@rm -fr dist/*
|
||||
@rm -fr build/*
|
||||
@hatch build
|
||||
|
||||
doc-clean: ## Update documentation
|
||||
make -C docs --makefile=Makefile clean
|
||||
doc-clean: ## Clean documentation folders
|
||||
@make -C docs --makefile=Makefile clean
|
||||
|
||||
help: ## Show help message
|
||||
help: ## Show this help message
|
||||
@IFS=$$'\n' ; \
|
||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||
printf "%s\n\n" "Usage: make [task]"; \
|
||||
|
@ -1,10 +1,11 @@
|
||||
# Odte
|
||||
|
||||

|
||||
[](https://github.com/Doctorado-ML/Odte/actions/workflows/codeql-analysis.yml)
|
||||
[](https://codecov.io/gh/Doctorado-ML/odte)
|
||||
[](https://www.codacy.com/gh/Doctorado-ML/Odte/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/Odte&utm_campaign=Badge_Grade)
|
||||
[](https://badge.fury.io/py/Odte)
|
||||

|
||||

|
||||
[](https://zenodo.org/badge/latestdoi/271595804)
|
||||
|
||||
# Odte
|
||||
|
||||
Oblique Decision Tree Ensemble
|
||||
Oblique Decision Tree Ensemble classifier based on [STree](https://github.com/doctorado-ml/stree) nodes.
|
||||
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
4
docs/requirements.txt
Normal file
4
docs/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
sphinx
|
||||
sphinx-rtd-theme
|
||||
myst-parser
|
||||
stree
|
10
docs/source/api/Odte.rst
Normal file
10
docs/source/api/Odte.rst
Normal file
@ -0,0 +1,10 @@
|
||||
Odte
|
||||
=====
|
||||
|
||||
.. automodule:: odte
|
||||
.. autoclass:: Odte
|
||||
:members:
|
||||
:undoc-members:
|
||||
:private-members:
|
||||
:show-inheritance:
|
||||
:noindex:
|
8
docs/source/api/index.rst
Normal file
8
docs/source/api/index.rst
Normal file
@ -0,0 +1,8 @@
|
||||
API index
|
||||
=========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
Odte
|
54
docs/source/conf.py
Normal file
54
docs/source/conf.py
Normal file
@ -0,0 +1,54 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
import odte
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "Odte"
|
||||
copyright = "2024 Ricardo Montañana Gómez"
|
||||
author = "Ricardo Montañana Gómez"
|
||||
|
||||
# The full version, including alpha/beta/rc tags
|
||||
version = release = odte.__version__
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = ["myst_parser", "sphinx.ext.autodoc", "sphinx.ext.viewcode"]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["_templates"]
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = []
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = []
|
BIN
docs/source/example.png
Normal file
BIN
docs/source/example.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.1 MiB |
13
docs/source/hyperparameters.md
Normal file
13
docs/source/hyperparameters.md
Normal file
@ -0,0 +1,13 @@
|
||||
# Hyperparameters
|
||||
|
||||
| | **Hyperparameter** | **Type/Values** | **Default** | |
|
||||
| --- | ------------------- | -------------------------------------------------------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| | estimator | \<sklearn.BaseEstimator\> | Stree() | Base estimator used to build each element of the ensemble. |
|
||||
| | n_jobs | \<int\> | -1 | Specifies the number of threads used to build the ensemble (-1 equals to all cores available) |
|
||||
| | random_state | \<int\> | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.<br>Pass an int for reproducible output across multiple function calls |
|
||||
| | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider in each tree:<br>\<int\> _max_features_ features for each tree.<br>\<float\> _max_features_ is a fraction and int(_max_features_ \* _n_features_) features are considered for each tree.<br>“auto” _max_features_=sqrt(_n_features_)<br>“sqrt” _max_features_=sqrt(_n_features_)<br>“log2” _max_features_=log2(_n_features_)<br>_None_ _max_features_=_n_features_ |
|
||||
| | max_samples | \<int\>, \<float\> | None |The number of samples to consider for bootstrap:<br>\<int\> _max_samples_ samples for each tree.<br>\<float\> _max_samples_ is a fraction and int(_max_samples_ \* _n_samples_) samples for each tree. |
|
||||
| | n_estimators | \<int\> | 100 | The number of trees the ensemble is going to build |
|
||||
| | be_hyperparams | \<str\> | "{}" | Hyperparameteres passed to the base estimator, i.e. "{\\"C\\": 17, \\"kernel\\": \\"rbf\\"}"|
|
||||
|
||||
|
14
docs/source/index.rst
Normal file
14
docs/source/index.rst
Normal file
@ -0,0 +1,14 @@
|
||||
Welcome to Odte's documentation!
|
||||
=================================
|
||||
|
||||
.. toctree::
|
||||
:caption: Contents:
|
||||
:titlesonly:
|
||||
|
||||
|
||||
odte
|
||||
install
|
||||
hyperparameters
|
||||
api/index
|
||||
|
||||
* :ref:`genindex`
|
15
docs/source/install.rst
Normal file
15
docs/source/install.rst
Normal file
@ -0,0 +1,15 @@
|
||||
Install
|
||||
=======
|
||||
|
||||
The main stable release
|
||||
|
||||
``pip install odte``
|
||||
|
||||
or the last development branch
|
||||
|
||||
``pip install git+https://github.com/doctorado-ml/odte``
|
||||
|
||||
Tests
|
||||
*****
|
||||
|
||||
``python -m unittest -v odte.tests``
|
17
docs/source/odte.md
Normal file
17
docs/source/odte.md
Normal file
@ -0,0 +1,17 @@
|
||||
# Odte
|
||||
|
||||

|
||||
[](https://github.com/Doctorado-ML/Odte/actions/workflows/codeql-analysis.yml)
|
||||
[](https://codecov.io/gh/Doctorado-ML/odte)
|
||||
[](https://www.codacy.com/gh/Doctorado-ML/Odte/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/Odte&utm_campaign=Badge_Grade)
|
||||
[](https://badge.fury.io/py/Odte)
|
||||

|
||||
[](https://zenodo.org/badge/latestdoi/271595804)
|
||||
|
||||
Oblique Decision Tree Ensemble classifier based on [STree](https://github.com/doctorado-ml/stree) nodes.
|
||||
|
||||

|
||||
|
||||
## License
|
||||
|
||||
Odte is [MIT](https://github.com/doctorado-ml/odte/blob/master/LICENSE) licensed
|
@ -1,388 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Compare Odte with different estimators"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Uncomment the next cell if Odte is not already installed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#\n",
|
||||
"# Google Colab setup\n",
|
||||
"#\n",
|
||||
"#!pip install git+https://github.com/doctorado-ml/odte\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datetime, time\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn import tree\n",
|
||||
"from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
|
||||
"from stree import Stree\n",
|
||||
"from odte import Odte"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"if not os.path.isfile('data/creditcard.csv'):\n",
|
||||
" !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
|
||||
" !tar xzf creditcard.tgz"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load dataset and normalize values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load Dataset\n",
|
||||
"df = pd.read_csv('data/creditcard.csv')\n",
|
||||
"df.shape\n",
|
||||
"random_state = 2020"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
|
||||
"print(\"Valid: {0:.3f}% {1:,}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Normalize Amount\n",
|
||||
"from sklearn.preprocessing import RobustScaler\n",
|
||||
"values = RobustScaler().fit_transform(df.Amount.values.reshape(-1, 1))\n",
|
||||
"df['Amount_Scaled'] = values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Remove unneeded features\n",
|
||||
"y = df.Class.values\n",
|
||||
"X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
|
||||
"print(f\"X shape: {X.shape}\\ny shape: {y.shape}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Build the models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Divide dataset\n",
|
||||
"train_size = .7\n",
|
||||
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=train_size, shuffle=True, random_state=random_state, stratify=y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Linear Tree\n",
|
||||
"linear_tree = tree.DecisionTreeClassifier(random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Random Forest\n",
|
||||
"random_forest = RandomForestClassifier(random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Stree\n",
|
||||
"stree = Stree(random_state=random_state, C=.01)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# AdaBoost\n",
|
||||
"adaboost = AdaBoostClassifier(random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Gradient Boosting\n",
|
||||
"gradient = GradientBoostingClassifier(random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Oblique Decision Tree Ensemble\n",
|
||||
"odte = Odte(random_state=random_state, max_features=\"auto\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Do the test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def try_model(name, model):\n",
|
||||
" print(f\"************************** {name} **********************\")\n",
|
||||
" now = time.time()\n",
|
||||
" model.fit(Xtrain, ytrain)\n",
|
||||
" spent = time.time() - now\n",
|
||||
" print(f\"Train Model {name} took: {spent:.4} seconds\")\n",
|
||||
" predict = model.predict(Xtrain)\n",
|
||||
" predictt = model.predict(Xtest)\n",
|
||||
" print(f\"=========== {name} - Train {Xtrain.shape[0]:,} samples =============\",)\n",
|
||||
" print(classification_report(ytrain, predict, digits=6))\n",
|
||||
" print(f\"=========== {name} - Test {Xtest.shape[0]:,} samples =============\")\n",
|
||||
" print(classification_report(ytest, predictt, digits=6))\n",
|
||||
" print(\"Confusion Matrix in Train\")\n",
|
||||
" print(confusion_matrix(ytrain, predict))\n",
|
||||
" print(\"Confusion Matrix in Test\")\n",
|
||||
" print(confusion_matrix(ytest, predictt))\n",
|
||||
" return f1_score(ytest, predictt), spent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Train & Test models\n",
|
||||
"models = {\n",
|
||||
" 'Linear Tree':linear_tree, 'Random Forest': random_forest, 'Stree (SVM Tree)': stree, \n",
|
||||
" 'AdaBoost model': adaboost, 'Odte model': odte #'Gradient Boost.': gradient\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"best_f1 = 0\n",
|
||||
"outcomes = []\n",
|
||||
"for name, model in models.items():\n",
|
||||
" f1, time_spent = try_model(name, model)\n",
|
||||
" outcomes.append((name, f1, time_spent))\n",
|
||||
" if f1 > best_f1:\n",
|
||||
" best_model = name\n",
|
||||
" best_time = time_spent\n",
|
||||
" best_f1 = f1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"*\"*110)\n",
|
||||
"print(f\"*The best f1 model is {best_model}, with a f1 score: {best_f1:.4} in {best_time:.6} seconds with {train_size:,} samples in train dataset\")\n",
|
||||
"print(\"*\"*110)\n",
|
||||
"for name, f1, time_spent in outcomes:\n",
|
||||
" print(f\"Model: {name}\\t Time: {time_spent:6.2f} seconds\\t f1: {f1:.4}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**************************************************************************************************************\n",
|
||||
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 152.54 seconds with 0.7 samples in train dataset\n",
|
||||
"**************************************************************************************************************\n",
|
||||
"Model: Linear Tree\t Time: 13.52 seconds\t f1: 0.7645\n",
|
||||
"Model: Random Forest\t Time: 152.54 seconds\t f1: 0.8815\n",
|
||||
"Model: Stree (SVM Tree)\t Time: 32.55 seconds\t f1: 0.8603\n",
|
||||
"Model: AdaBoost model\t Time: 47.34 seconds\t f1: 0.7509\n",
|
||||
"Model: Gradient Boost.\t Time: 244.12 seconds\t f1: 0.5259"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"******************************************************************************************************************\n",
|
||||
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 218.966 seconds with 0.7 samples in train dataset\n",
|
||||
"******************************************************************************************************************\n",
|
||||
"Model: Linear Tree Time: 23.05 seconds\t f1: 0.7645\n",
|
||||
"Model: Random Forest\t Time: 218.97 seconds\t f1: 0.8815\n",
|
||||
"Model: Stree (SVM Tree)\t Time: 49.45 seconds\t f1: 0.8467\n",
|
||||
"Model: AdaBoost model\t Time: 73.83 seconds\t f1: 0.7509\n",
|
||||
"Model: Gradient Boost.\t Time: 388.69 seconds\t f1: 0.5259\n",
|
||||
"Model: Neural Network\t Time: 25.47 seconds\t f1: 0.8328\n",
|
||||
"Model: Odte \t Time:2134.25 seconds\t f1: 0.8385\n",
|
||||
"```"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"hide_input": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.7.6 64-bit ('general': venv)",
|
||||
"language": "python",
|
||||
"name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6-final"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": 1,
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "Table of Contents",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": false,
|
||||
"toc_position": {},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": false
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "392px",
|
||||
"left": "1518px",
|
||||
"right": "20px",
|
||||
"top": "40px",
|
||||
"width": "392px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": true
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -1,174 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datetime, time\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.model_selection import train_test_split, cross_validate\n",
|
||||
"from sklearn import tree\n",
|
||||
"from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n",
|
||||
"from stree import Stree\n",
|
||||
"from odte import Odte\n",
|
||||
"\n",
|
||||
"random_state = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_wine\n",
|
||||
"X, y = load_wine(return_X_y=True)\n",
|
||||
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_estimators = 20\n",
|
||||
"clf = {}\n",
|
||||
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
|
||||
"clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
|
||||
"clf[\"odte\"] = Odte(n_jobs=-1, base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
|
||||
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
||||
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"*\"*30,\"Results for wine\", \"*\"*30)\n",
|
||||
"for clf_type, item in clf.items():\n",
|
||||
" print(f\"Training {clf_type}...\")\n",
|
||||
" now = time.time()\n",
|
||||
" item.fit(Xtrain, ytrain)\n",
|
||||
" print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"X, y = load_iris(return_X_y=True)\n",
|
||||
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_estimators = 10\n",
|
||||
"clf = {}\n",
|
||||
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
|
||||
"clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
|
||||
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
||||
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"*\"*30,\"Results for iris\", \"*\"*30)\n",
|
||||
"for clf_type, item in clf.items():\n",
|
||||
" print(f\"Training {clf_type}...\")\n",
|
||||
" now = time.time()\n",
|
||||
" item.fit(Xtrain, ytrain)\n",
|
||||
" print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cross = cross_validate(estimator=clf[\"odte\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
|
||||
"print(cross)\n",
|
||||
"print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cross = cross_validate(estimator=clf[\"adaboost\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
|
||||
"print(cross)\n",
|
||||
"print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.utils.estimator_checks import check_estimator\n",
|
||||
"# Make checks one by one\n",
|
||||
"c = 0\n",
|
||||
"checks = check_estimator(Odte(), generate_only=True)\n",
|
||||
"for check in checks:\n",
|
||||
" c += 1\n",
|
||||
" print(c, check[1])\n",
|
||||
" check[1](check[0])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
"hash": "da86226729227d0e8962a5ec29ea906307507ca2c30ceaaf651c09a617630939"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.2 64-bit ('general': venv)",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
},
|
||||
"orig_nbformat": 2
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
63
odte/Odte.py
63
odte/Odte.py
@ -5,6 +5,7 @@ __license__ = "MIT"
|
||||
Build a forest of oblique trees based on STree, admits any base classifier
|
||||
as well
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import random
|
||||
import json
|
||||
@ -15,7 +16,7 @@ from sklearn.utils.multiclass import ( # type: ignore
|
||||
check_classification_targets,
|
||||
)
|
||||
from sklearn.base import clone, BaseEstimator, ClassifierMixin # type: ignore
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.utils import check_random_state # type: ignore
|
||||
from sklearn.ensemble import BaseEnsemble # type: ignore
|
||||
from sklearn.utils.validation import ( # type: ignore
|
||||
check_is_fitted,
|
||||
@ -30,19 +31,19 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
def __init__(
|
||||
self,
|
||||
# n_jobs = -1 to use all available cores
|
||||
n_jobs: int = 1,
|
||||
base_estimator: BaseEstimator = None,
|
||||
random_state: int = 0,
|
||||
n_jobs: int = -1,
|
||||
estimator: BaseEstimator = Stree(),
|
||||
random_state: Optional[int] = None,
|
||||
max_features: Optional[Union[str, int, float]] = None,
|
||||
max_samples: Optional[Union[int, float]] = None,
|
||||
n_estimators: int = 100,
|
||||
be_hyperparams: str = "{}",
|
||||
):
|
||||
super().__init__(
|
||||
base_estimator=base_estimator,
|
||||
estimator=estimator,
|
||||
n_estimators=n_estimators,
|
||||
)
|
||||
self.base_estimator = base_estimator
|
||||
self.estimator = estimator
|
||||
self.n_jobs = n_jobs
|
||||
self.n_estimators = n_estimators
|
||||
self.random_state = random_state
|
||||
@ -55,13 +56,16 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
return __version__
|
||||
|
||||
def _validate_estimator(self) -> None:
|
||||
"""Check the estimator and set the base_estimator_ attribute."""
|
||||
"""Check the estimator and set the estimator_ attribute."""
|
||||
super()._validate_estimator(
|
||||
default=Stree(random_state=self.random_state)
|
||||
)
|
||||
|
||||
def fit(
|
||||
self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray = None
|
||||
self,
|
||||
X: np.ndarray,
|
||||
y: np.ndarray,
|
||||
sample_weight: Optional[np.ndarray] = None,
|
||||
) -> Odte:
|
||||
# Check parameters are Ok.
|
||||
if self.n_estimators < 3:
|
||||
@ -79,7 +83,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
# Initialize computed parameters
|
||||
# Build the estimator
|
||||
self.max_features_ = self._initialize_max_features()
|
||||
# build base_estimator_
|
||||
# build estimator_
|
||||
self._validate_estimator()
|
||||
self.classes_, y = np.unique(y, return_inverse=True)
|
||||
self.n_classes_: int = self.classes_.shape[0]
|
||||
@ -91,7 +95,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
return self
|
||||
|
||||
def _compute_metrics(self) -> None:
|
||||
tdepth = tnodes = tleaves = 0.0
|
||||
tdepth = tnodes = tleaves = 0
|
||||
for estimator in self.estimators_:
|
||||
if hasattr(estimator, "nodes_leaves"):
|
||||
nodes, leaves = estimator.nodes_leaves()
|
||||
@ -99,16 +103,21 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
tdepth += depth
|
||||
tnodes += nodes
|
||||
tleaves += leaves
|
||||
self.depth_ = tdepth / self.n_estimators
|
||||
self.leaves_ = tleaves / self.n_estimators
|
||||
self.nodes_ = tnodes / self.n_estimators
|
||||
self.depth_ = tdepth
|
||||
self.leaves_ = tleaves
|
||||
self.nodes_ = tnodes
|
||||
|
||||
def _train(
|
||||
self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
|
||||
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
|
||||
n_samples = X.shape[0]
|
||||
boot_samples = self._get_bootstrap_n_samples(n_samples)
|
||||
estimator = clone(self.base_estimator_)
|
||||
estimator = clone(self.estimator_)
|
||||
defined_state = (
|
||||
random.randint(0, 2**31)
|
||||
if self.random_state is None
|
||||
else self.random_state
|
||||
)
|
||||
return Parallel(n_jobs=self.n_jobs, prefer="threads")( # type: ignore
|
||||
delayed(Odte._parallel_build_tree)(
|
||||
estimator,
|
||||
@ -121,13 +130,13 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
self.be_hyperparams,
|
||||
)
|
||||
for random_seed in range(
|
||||
self.random_state, self.random_state + self.n_estimators
|
||||
defined_state, defined_state + self.n_estimators
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parallel_build_tree(
|
||||
base_estimator_: BaseEstimator,
|
||||
estimator_: BaseEstimator,
|
||||
X: np.ndarray,
|
||||
y: np.ndarray,
|
||||
weights: np.ndarray,
|
||||
@ -136,13 +145,15 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
max_features: int,
|
||||
hyperparams: str,
|
||||
) -> Tuple[BaseEstimator, Tuple[int, ...]]:
|
||||
clf = clone(base_estimator_)
|
||||
clf = clone(estimator_)
|
||||
hyperparams_ = json.loads(hyperparams)
|
||||
hyperparams_.update(dict(random_state=random_seed))
|
||||
clf.set_params(**hyperparams_)
|
||||
n_samples = X.shape[0]
|
||||
# bootstrap
|
||||
# initialize random boxes
|
||||
random.seed(random_seed)
|
||||
random_box = check_random_state(random_seed)
|
||||
# bootstrap
|
||||
indices = random_box.randint(0, n_samples, boot_samples)
|
||||
# update weights with the chosen samples
|
||||
weights_update = np.bincount(indices, minlength=n_samples)
|
||||
@ -249,6 +260,18 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
result[i, predictions[i]] += 1
|
||||
return result / self.n_estimators
|
||||
|
||||
def nodes_leaves(self) -> Tuple[float, float]:
|
||||
def get_nodes(self) -> int:
|
||||
check_is_fitted(self, "estimators_")
|
||||
return self.nodes_, self.leaves_
|
||||
return self.nodes_
|
||||
|
||||
def get_leaves(self) -> int:
|
||||
check_is_fitted(self, "estimators_")
|
||||
return self.leaves_
|
||||
|
||||
def get_depth(self) -> int:
|
||||
check_is_fitted(self, "estimators_")
|
||||
return self.depth_
|
||||
|
||||
def nodes_leaves(self) -> Tuple[int, int]:
|
||||
check_is_fitted(self, "estimators_")
|
||||
return (self.get_nodes(), self.get_leaves())
|
||||
|
@ -1,3 +1,4 @@
|
||||
from ._version import __version__
|
||||
from .Odte import Odte
|
||||
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
@ -5,4 +6,4 @@ __copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT License"
|
||||
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
||||
|
||||
__all__ = ["Odte"]
|
||||
__all__ = ["__version__", "Odte"]
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.3.2"
|
||||
__version__ = "1.0.0-1"
|
||||
|
@ -1,7 +1,6 @@
|
||||
# type: ignore
|
||||
import unittest
|
||||
import os
|
||||
import random
|
||||
import warnings
|
||||
import json
|
||||
from sklearn.exceptions import ConvergenceWarning, NotFittedError
|
||||
@ -46,7 +45,10 @@ class Odte_test(unittest.TestCase):
|
||||
)
|
||||
for max_features in [4, 0.4, 1.0, None, "auto", "sqrt", "log2"]:
|
||||
tclf = Odte(
|
||||
random_state=self._random_state, max_features=max_features
|
||||
random_state=self._random_state,
|
||||
max_features=max_features,
|
||||
n_jobs=1,
|
||||
n_estimators=100,
|
||||
)
|
||||
tclf.fit(X, y)
|
||||
computed = tclf._get_random_subspace(X, y, tclf.max_features_)
|
||||
@ -75,15 +77,15 @@ class Odte_test(unittest.TestCase):
|
||||
X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2]
|
||||
expected = [0, 1, 1, 2]
|
||||
tclf = Odte(
|
||||
base_estimator=Stree(),
|
||||
estimator=Stree(),
|
||||
random_state=self._random_state,
|
||||
n_estimators=10,
|
||||
n_jobs=-1,
|
||||
)
|
||||
tclf.set_params(
|
||||
**dict(
|
||||
base_estimator__kernel="rbf",
|
||||
base_estimator__random_state=self._random_state,
|
||||
estimator__kernel="rbf",
|
||||
estimator__random_state=self._random_state,
|
||||
)
|
||||
)
|
||||
computed = tclf.fit(X, y).predict(X)
|
||||
@ -95,14 +97,15 @@ class Odte_test(unittest.TestCase):
|
||||
X, y = load_dataset(self._random_state)
|
||||
expected = y
|
||||
tclf = Odte(
|
||||
base_estimator=Stree(),
|
||||
estimator=Stree(),
|
||||
random_state=self._random_state,
|
||||
max_features=1.0,
|
||||
max_samples=0.1,
|
||||
n_estimators=100,
|
||||
)
|
||||
tclf.set_params(
|
||||
**dict(
|
||||
base_estimator__kernel="linear",
|
||||
estimator__kernel="linear",
|
||||
)
|
||||
)
|
||||
computed = tclf.fit(X, y).predict(X)
|
||||
@ -135,7 +138,6 @@ class Odte_test(unittest.TestCase):
|
||||
0.97, # iwss None
|
||||
0.97, # cfs None
|
||||
]
|
||||
random.seed(self._random_state)
|
||||
for max_features in ["auto", None]:
|
||||
for splitter in [
|
||||
"best",
|
||||
@ -146,21 +148,22 @@ class Odte_test(unittest.TestCase):
|
||||
"cfs",
|
||||
]:
|
||||
tclf = Odte(
|
||||
base_estimator=Stree(),
|
||||
estimator=Stree(),
|
||||
random_state=self._random_state,
|
||||
n_estimators=3,
|
||||
n_jobs=1,
|
||||
)
|
||||
tclf.set_params(
|
||||
**dict(
|
||||
base_estimator__max_features=max_features,
|
||||
base_estimator__splitter=splitter,
|
||||
base_estimator__random_state=self._random_state,
|
||||
estimator__max_features=max_features,
|
||||
estimator__splitter=splitter,
|
||||
estimator__random_state=self._random_state,
|
||||
)
|
||||
)
|
||||
expected = results.pop(0)
|
||||
computed = tclf.fit(X, y).score(X, y)
|
||||
# print(computed, splitter, max_features)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
self.assertAlmostEqual(expected, computed, msg=splitter)
|
||||
|
||||
def test_generate_subspaces(self):
|
||||
features = 250
|
||||
@ -177,26 +180,32 @@ class Odte_test(unittest.TestCase):
|
||||
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
||||
from sklearn.utils.estimator_checks import check_estimator
|
||||
|
||||
check_estimator(Odte())
|
||||
check_estimator(Odte(n_estimators=10))
|
||||
|
||||
def test_nodes_leaves_not_fitted(self):
|
||||
tclf = Odte(
|
||||
base_estimator=Stree(),
|
||||
estimator=Stree(),
|
||||
random_state=self._random_state,
|
||||
n_estimators=3,
|
||||
)
|
||||
with self.assertRaises(NotFittedError):
|
||||
tclf.nodes_leaves()
|
||||
with self.assertRaises(NotFittedError):
|
||||
tclf.get_nodes()
|
||||
with self.assertRaises(NotFittedError):
|
||||
tclf.get_leaves()
|
||||
with self.assertRaises(NotFittedError):
|
||||
tclf.get_depth()
|
||||
|
||||
def test_nodes_leaves_depth(self):
|
||||
tclf = Odte(
|
||||
base_estimator=Stree(),
|
||||
estimator=Stree(),
|
||||
random_state=self._random_state,
|
||||
n_estimators=5,
|
||||
n_jobs=1,
|
||||
)
|
||||
tclf_p = Odte(
|
||||
base_estimator=Stree(),
|
||||
estimator=Stree(),
|
||||
random_state=self._random_state,
|
||||
n_estimators=5,
|
||||
n_jobs=-1,
|
||||
@ -205,28 +214,37 @@ class Odte_test(unittest.TestCase):
|
||||
tclf.fit(X, y)
|
||||
tclf_p.fit(X, y)
|
||||
for clf in [tclf, tclf_p]:
|
||||
self.assertAlmostEqual(5.8, clf.depth_)
|
||||
self.assertAlmostEqual(9.4, clf.leaves_)
|
||||
self.assertAlmostEqual(17.8, clf.nodes_)
|
||||
self.assertEqual(29, clf.depth_)
|
||||
self.assertEqual(29, clf.get_depth())
|
||||
self.assertEqual(47, clf.leaves_)
|
||||
self.assertEqual(47, clf.get_leaves())
|
||||
self.assertEqual(89, clf.nodes_)
|
||||
self.assertEqual(89, clf.get_nodes())
|
||||
nodes, leaves = clf.nodes_leaves()
|
||||
self.assertAlmostEqual(9.4, leaves)
|
||||
self.assertAlmostEqual(17.8, nodes)
|
||||
self.assertEqual(47, leaves)
|
||||
self.assertEqual(47, clf.get_leaves())
|
||||
self.assertEqual(89, nodes)
|
||||
self.assertEqual(89, clf.get_nodes())
|
||||
|
||||
def test_nodes_leaves_SVC(self):
|
||||
tclf = Odte(
|
||||
base_estimator=SVC(),
|
||||
estimator=SVC(),
|
||||
random_state=self._random_state,
|
||||
n_estimators=3,
|
||||
)
|
||||
X, y = load_dataset(self._random_state, n_features=16, n_samples=500)
|
||||
tclf.fit(X, y)
|
||||
self.assertAlmostEqual(0.0, tclf.leaves_)
|
||||
self.assertAlmostEqual(0.0, tclf.get_leaves())
|
||||
self.assertAlmostEqual(0.0, tclf.nodes_)
|
||||
self.assertAlmostEqual(0.0, tclf.get_nodes())
|
||||
nodes, leaves = tclf.nodes_leaves()
|
||||
self.assertAlmostEqual(0.0, leaves)
|
||||
self.assertAlmostEqual(0.0, tclf.get_leaves())
|
||||
self.assertAlmostEqual(0.0, nodes)
|
||||
self.assertAlmostEqual(0.0, tclf.get_nodes())
|
||||
|
||||
def test_base_estimator_hyperparams(self):
|
||||
def test_estimator_hyperparams(self):
|
||||
data = [
|
||||
(Stree(), {"max_features": 7, "max_depth": 2}),
|
||||
(SVC(), {"kernel": "linear", "cache_size": 100}),
|
||||
@ -234,7 +252,7 @@ class Odte_test(unittest.TestCase):
|
||||
for clf, hyperparams in data:
|
||||
hyperparams_ = json.dumps(hyperparams)
|
||||
tclf = Odte(
|
||||
base_estimator=clf,
|
||||
estimator=clf,
|
||||
random_state=self._random_state,
|
||||
n_estimators=3,
|
||||
be_hyperparams=hyperparams_,
|
||||
|
@ -1,4 +1,3 @@
|
||||
# type: ignore
|
||||
from .Odte_tests import Odte_test
|
||||
|
||||
__all__ = ["Odte_test"]
|
||||
|
@ -1,5 +1,65 @@
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "Odte"
|
||||
description = "Oblique decision tree Ensemble."
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
authors = [
|
||||
{ name = "Ricardo Montañana", email = "ricardo.montanana@alu.uclm.es" },
|
||||
]
|
||||
dynamic = ['version']
|
||||
dependencies = ["stree>=1.4"]
|
||||
requires-python = ">=3.11"
|
||||
keywords = [
|
||||
"scikit-learn",
|
||||
"oblique-classifier",
|
||||
"oblique-decision-tree",
|
||||
"decision-tree",
|
||||
"ensemble",
|
||||
"svm",
|
||||
]
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Intended Audience :: Developers",
|
||||
"Topic :: Software Development",
|
||||
"Topic :: Scientific/Engineering",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Natural Language :: English",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["black", "flake8", "coverage", "mypy", "pandas", "hatch", "pip-audit"]
|
||||
doc = ["sphinx", "myst-parser", "sphinx_rtd_theme", "sphinx-autodoc-typehints"]
|
||||
|
||||
[project.urls]
|
||||
Home = "https://github.com/doctorado-ml/odte"
|
||||
Docs = "https://odte.readthedocs.io"
|
||||
|
||||
[tool.hatch.version]
|
||||
path = "odte/_version.py"
|
||||
|
||||
[tool.mypy]
|
||||
exclude = ['tests']
|
||||
|
||||
[tool.coverage.run]
|
||||
branch = true
|
||||
source = ["odte"]
|
||||
command_line = "-m unittest discover -s odte.tests"
|
||||
|
||||
[tool.coverage.report]
|
||||
show_missing = true
|
||||
fail_under = 100
|
||||
|
||||
[tool.black]
|
||||
line-length = 79
|
||||
target_version = ['py311']
|
||||
include = '\.pyi?$'
|
||||
exclude = '''
|
||||
/(
|
||||
|
@ -1 +1,2 @@
|
||||
stree>1.2.2
|
||||
scikit-learn==1.5.2
|
||||
stree>=1.4
|
||||
|
48
setup.py
48
setup.py
@ -1,48 +0,0 @@
|
||||
import setuptools
|
||||
import os
|
||||
|
||||
|
||||
def readme():
|
||||
with open("README.md") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def get_data(field):
|
||||
item = ""
|
||||
file_name = "_version.py" if field == "version" else "__init__.py"
|
||||
with open(os.path.join("odte", file_name)) as f:
|
||||
for line in f.readlines():
|
||||
if line.startswith(f"__{field}__"):
|
||||
delim = '"' if '"' in line else "'"
|
||||
item = line.split(delim)[1]
|
||||
break
|
||||
else:
|
||||
raise RuntimeError(f"Unable to find {field} string.")
|
||||
return item
|
||||
|
||||
|
||||
setuptools.setup(
|
||||
name="Odte",
|
||||
version=get_data("version"),
|
||||
license=get_data("license"),
|
||||
description="Oblique decision tree Ensemble",
|
||||
long_description=readme(),
|
||||
long_description_content_type="text/markdown",
|
||||
packages=setuptools.find_packages(),
|
||||
url="https://github.com/doctorado-ml/odte",
|
||||
author=get_data("author"),
|
||||
author_email=get_data("author_email"),
|
||||
keywords="scikit-learn oblique-classifier oblique-decision-tree decision-\
|
||||
tree ensemble svm svc",
|
||||
classifiers=[
|
||||
"Development Status :: 4 - Beta",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Natural Language :: English",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Intended Audience :: Science/Research",
|
||||
],
|
||||
install_requires=["stree"],
|
||||
test_suite="odte.tests",
|
||||
zip_safe=False,
|
||||
)
|
Loading…
x
Reference in New Issue
Block a user