Compare commits

...

25 Commits

Author SHA1 Message Date
0f4ea8542e
Merge branch 'master' of github.com:Doctorado-ML/odte 2025-05-20 11:42:39 +02:00
0fa37dfdf3
Fix scikit-learn version in requirements for tests 2025-05-20 11:42:00 +02:00
582ff44cb1
Update return types in nodes_leaves 2025-04-09 11:19:37 +02:00
66fe0bbe48
Add build 1 to version 2024-09-03 18:15:04 +02:00
1d906e2cb3
Fix doc link in project 2024-09-03 18:08:56 +02:00
Ricardo Montañana Gómez
31a3753046
Merge pull request #9 from Doctorado-ML/v1.0
Update doc config
2024-08-16 18:57:53 +02:00
3c0f03d27e
Add hyperparameters.md to doc 2024-08-16 16:33:28 +02:00
d644031f3f
Add hyperparameters.md to doc 2024-08-16 16:33:04 +02:00
1790f7fee7
Update readme 2024-08-15 13:47:30 +02:00
624f79af0d
Update readthedocs config 2024-08-15 13:30:19 +02:00
40031b7d52
Update readme 2024-08-15 13:23:20 +02:00
bcc763e656
Update doc config
Change build tool to hatch
2024-08-15 13:16:51 +02:00
b19264b1eb
Set default estimator to STree 2024-07-28 18:30:17 +02:00
02e75b3c3e
Fix depth/leaves/nodes no longer return average 2023-11-27 13:53:15 +01:00
52d1095161
Add separate methods to return nodes/leaves/depth 2023-11-27 10:33:47 +01:00
f9b83adfee
Change default n_estimators to 10 instead of 100 2023-10-10 09:34:43 +02:00
Ricardo Montañana Gómez
382a420791
ci: ⬆️ Upgrade github actions for codeql and README to add badge 2023-01-15 01:58:19 +01:00
Ricardo Montañana Gómez
7aa4156e51
ci: ⬆️ Update github action setup-python 2023-01-15 01:50:41 +01:00
Ricardo Montañana Gómez
0df2f243a5
ci: ⬆️ Upgrade github actions 2023-01-15 01:45:39 +01:00
Ricardo Montañana Gómez
d3ceb3ce46
refactor: 🔖 Update requirements and version info 2023-01-15 01:32:26 +01:00
Ricardo Montañana Gómez
cabf926eb1
Update to scikit-learn 1.2 2023-01-14 21:38:11 +01:00
Ricardo Montañana Gómez
7300bd66db
Merge pull request #8 from Doctorado-ML/fix_python_random_init
Fix python random init
2022-04-29 10:22:33 +02:00
114f53d5e8
Update version file 2022-04-29 10:07:05 +02:00
267a17a708
Remove unneeded Random module from tests
Update pre-commit config
2022-04-20 11:25:45 +02:00
e01ca43cf9
Fix python random init 2022-03-10 13:17:56 +01:00
27 changed files with 381 additions and 727 deletions

View File

@ -7,7 +7,7 @@ on:
# The branches below must be a subset of the branches above # The branches below must be a subset of the branches above
branches: [master] branches: [master]
schedule: schedule:
- cron: '16 17 * * 3' - cron: "16 17 * * 3"
jobs: jobs:
analyze: analyze:
@ -17,7 +17,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
language: [ 'python' ] language: ["python"]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
# Learn more: # Learn more:
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
@ -28,7 +28,7 @@ jobs:
# Initializes the CodeQL tools for scanning. # Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL - name: Initialize CodeQL
uses: github/codeql-action/init@v1 uses: github/codeql-action/init@v2
with: with:
languages: ${{ matrix.language }} languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file. # If you wish to specify custom queries, you can do so here or in a config file.
@ -39,7 +39,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below) # If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild - name: Autobuild
uses: github/codeql-action/autobuild@v1 uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell. # Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl # 📚 https://git.io/JvXDl
@ -53,4 +53,4 @@ jobs:
# make release # make release
- name: Perform CodeQL Analysis - name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1 uses: github/codeql-action/analyze@v2

View File

@ -13,12 +13,12 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [macos-latest, ubuntu-latest, windows-latest] os: [macos-latest, ubuntu-latest, windows-latest]
python: [3.8, 3.9, "3.10"] python: [3.11, 3.12]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python }} - name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v2 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python }} python-version: ${{ matrix.python }}
- name: Install dependencies - name: Install dependencies
@ -35,7 +35,7 @@ jobs:
coverage run -m unittest -v odte.tests coverage run -m unittest -v odte.tests
coverage xml coverage xml
- name: Upload coverage to Codecov - name: Upload coverage to Codecov
uses: codecov/codecov-action@v1 uses: codecov/codecov-action@v4
with: with:
token: ${{ secrets.CODECOV_TOKEN }} token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml files: ./coverage.xml

View File

@ -1,23 +1,23 @@
repos: repos:
- repo: https://github.com/ambv/black - repo: https://github.com/ambv/black
rev: 20.8b1 rev: 22.3.0
hooks: hooks:
- id: black - id: black
exclude: ".virtual_documents" exclude: ".virtual_documents"
language_version: python3.9 language_version: python3.9
- repo: https://gitlab.com/pycqa/flake8 - repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4 rev: 3.9.2
hooks: hooks:
- id: flake8 - id: flake8
exclude: ".virtual_documents" exclude: ".virtual_documents"
- repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy
rev: "v0.790" # Use the sha / tag you want to point at rev: "v0.942" # Use the sha / tag you want to point at
hooks: hooks:
- id: mypy - id: mypy
#args: [--strict, --ignore-missing-imports] #args: [--strict, --ignore-missing-imports]
exclude: odte/tests exclude: odte/tests
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0 rev: v4.2.0
hooks: hooks:
- id: trailing-whitespace - id: trailing-whitespace
- id: check-case-conflict - id: check-case-conflict

14
.readthedocs.yaml Normal file
View File

@ -0,0 +1,14 @@
version: 2
sphinx:
configuration: docs/source/conf.py
build:
os: ubuntu-22.04
tools:
python: "3.12"
python:
install:
- requirements: requirements.txt
- requirements: docs/requirements.txt

1
MANIFEST.in Normal file
View File

@ -0,0 +1 @@
include README.md LICENSE

View File

@ -1,44 +1,35 @@
SHELL := /bin/bash SHELL := /bin/bash
.DEFAULT_GOAL := help .DEFAULT_GOAL := help
.PHONY: coverage deps help lint push test doc build .PHONY: audit coverage help lint test doc doc-clean build
coverage: ## Run tests with coverage coverage: ## Run tests with coverage
coverage erase @coverage erase
coverage run -m unittest -v odte.tests @coverage run -m unittest -v odte.tests
coverage report -m @coverage report -m
deps: ## Install dependencies lint: ## Lint source files
pip install -r requirements.txt @black odte
@flake8 odte
devdeps: ## Install development dependencies @mypy odte
pip install black pip-audit flake8 mypy coverage
lint: ## Lint and static-check
black odte
flake8 odte
mypy odte --exclude tests
audit: ## Audit pip audit: ## Audit pip
pip-audit @pip-audit
push: ## Push code with tags
git push && git push --tags
test: ## Run tests test: ## Run tests
python -m unittest -v odte.tests @python -m unittest -v odte.tests
doc: ## Update documentation doc: ## Update documentation
make -C docs --makefile=Makefile html @make -C docs --makefile=Makefile html
build: ## Build package build: ## Build package
rm -fr dist/* @rm -fr dist/*
rm -fr build/* @rm -fr build/*
python setup.py sdist bdist_wheel @hatch build
doc-clean: ## Update documentation doc-clean: ## Clean documentation folders
make -C docs --makefile=Makefile clean @make -C docs --makefile=Makefile clean
help: ## Show help message help: ## Show this help message
@IFS=$$'\n' ; \ @IFS=$$'\n' ; \
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \ help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
printf "%s\n\n" "Usage: make [task]"; \ printf "%s\n\n" "Usage: make [task]"; \

View File

@ -1,10 +1,11 @@
# Odte
![CI](https://github.com/Doctorado-ML/Odte/workflows/CI/badge.svg) ![CI](https://github.com/Doctorado-ML/Odte/workflows/CI/badge.svg)
[![CodeQL](https://github.com/Doctorado-ML/Odte/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Doctorado-ML/Odte/actions/workflows/codeql-analysis.yml)
[![codecov](https://codecov.io/gh/Doctorado-ML/odte/branch/master/graph/badge.svg)](https://codecov.io/gh/Doctorado-ML/odte) [![codecov](https://codecov.io/gh/Doctorado-ML/odte/branch/master/graph/badge.svg)](https://codecov.io/gh/Doctorado-ML/odte)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/f4b5ef87584b4095b6e49aefbe594c82)](https://www.codacy.com/gh/Doctorado-ML/Odte/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/Odte&utm_campaign=Badge_Grade) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/f4b5ef87584b4095b6e49aefbe594c82)](https://www.codacy.com/gh/Doctorado-ML/Odte/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/Odte&utm_campaign=Badge_Grade)
[![PyPI version](https://badge.fury.io/py/Odte.svg)](https://badge.fury.io/py/Odte) [![PyPI version](https://badge.fury.io/py/Odte.svg)](https://badge.fury.io/py/Odte)
![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen) ![https://img.shields.io/badge/python-3.11%2B-blue](https://img.shields.io/badge/python-3.11%2B-brightgreen)
[![DOI](https://zenodo.org/badge/271595804.svg)](https://zenodo.org/badge/latestdoi/271595804) [![DOI](https://zenodo.org/badge/271595804.svg)](https://zenodo.org/badge/latestdoi/271595804)
# Odte Oblique Decision Tree Ensemble classifier based on [STree](https://github.com/doctorado-ml/stree) nodes.
Oblique Decision Tree Ensemble

20
docs/Makefile Normal file
View File

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

4
docs/requirements.txt Normal file
View File

@ -0,0 +1,4 @@
sphinx
sphinx-rtd-theme
myst-parser
stree

10
docs/source/api/Odte.rst Normal file
View File

@ -0,0 +1,10 @@
Odte
=====
.. automodule:: odte
.. autoclass:: Odte
:members:
:undoc-members:
:private-members:
:show-inheritance:
:noindex:

View File

@ -0,0 +1,8 @@
API index
=========
.. toctree::
:maxdepth: 2
:caption: Contents:
Odte

54
docs/source/conf.py Normal file
View File

@ -0,0 +1,54 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import odte
# -- Project information -----------------------------------------------------
project = "Odte"
copyright = "2024 Ricardo Montañana Gómez"
author = "Ricardo Montañana Gómez"
# The full version, including alpha/beta/rc tags
version = release = odte.__version__
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ["myst_parser", "sphinx.ext.autodoc", "sphinx.ext.viewcode"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = []

BIN
docs/source/example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 MiB

View File

@ -0,0 +1,13 @@
# Hyperparameters
| | **Hyperparameter** | **Type/Values** | **Default** | |
| --- | ------------------- | -------------------------------------------------------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| | estimator | \<sklearn.BaseEstimator\> | Stree() | Base estimator used to build each element of the ensemble. |
| | n_jobs | \<int\> | -1 | Specifies the number of threads used to build the ensemble (-1 equals to all cores available) |
| | random_state | \<int\> | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.<br>Pass an int for reproducible output across multiple function calls |
| | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider in each tree:<br>\<int\> _max_features_ features for each tree.<br>\<float\> _max_features_ is a fraction and int(_max_features_ \* _n_features_) features are considered for each tree.<br>“auto” _max_features_=sqrt(_n_features_)<br>“sqrt” _max_features_=sqrt(_n_features_)<br>“log2” _max_features_=log2(_n_features_)<br>_None_ _max_features_=_n_features_ |
| | max_samples | \<int\>, \<float\> | None |The number of samples to consider for bootstrap:<br>\<int\> _max_samples_ samples for each tree.<br>\<float\> _max_samples_ is a fraction and int(_max_samples_ \* _n_samples_) samples for each tree. |
| | n_estimators | \<int\> | 100 | The number of trees the ensemble is going to build |
| | be_hyperparams | \<str\> | "{}" | Hyperparameteres passed to the base estimator, i.e. "{\\"C\\": 17, \\"kernel\\": \\"rbf\\"}"|

14
docs/source/index.rst Normal file
View File

@ -0,0 +1,14 @@
Welcome to Odte's documentation!
=================================
.. toctree::
:caption: Contents:
:titlesonly:
odte
install
hyperparameters
api/index
* :ref:`genindex`

15
docs/source/install.rst Normal file
View File

@ -0,0 +1,15 @@
Install
=======
The main stable release
``pip install odte``
or the last development branch
``pip install git+https://github.com/doctorado-ml/odte``
Tests
*****
``python -m unittest -v odte.tests``

17
docs/source/odte.md Normal file
View File

@ -0,0 +1,17 @@
# Odte
![CI](https://github.com/Doctorado-ML/Odte/workflows/CI/badge.svg)
[![CodeQL](https://github.com/Doctorado-ML/Odte/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Doctorado-ML/Odte/actions/workflows/codeql-analysis.yml)
[![codecov](https://codecov.io/gh/Doctorado-ML/odte/branch/master/graph/badge.svg)](https://codecov.io/gh/Doctorado-ML/odte)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/f4b5ef87584b4095b6e49aefbe594c82)](https://www.codacy.com/gh/Doctorado-ML/Odte/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/Odte&utm_campaign=Badge_Grade)
[![PyPI version](https://badge.fury.io/py/Odte.svg)](https://badge.fury.io/py/Odte)
![https://img.shields.io/badge/python-3.11%2B-blue](https://img.shields.io/badge/python-3.11%2B-brightgreen)
[![DOI](https://zenodo.org/badge/271595804.svg)](https://zenodo.org/badge/latestdoi/271595804)
Oblique Decision Tree Ensemble classifier based on [STree](https://github.com/doctorado-ml/stree) nodes.
![Odte](./example.png)
## License
Odte is [MIT](https://github.com/doctorado-ml/odte/blob/master/LICENSE) licensed

View File

@ -1,388 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compare Odte with different estimators"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup\n",
"Uncomment the next cell if Odte is not already installed"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Google Colab setup\n",
"#\n",
"#!pip install git+https://github.com/doctorado-ml/odte\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import datetime, time\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import tree\n",
"from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
"from stree import Stree\n",
"from odte import Odte"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"if not os.path.isfile('data/creditcard.csv'):\n",
" !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
" !tar xzf creditcard.tgz"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tests"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load dataset and normalize values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load Dataset\n",
"df = pd.read_csv('data/creditcard.csv')\n",
"df.shape\n",
"random_state = 2020"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
"print(\"Valid: {0:.3f}% {1:,}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Normalize Amount\n",
"from sklearn.preprocessing import RobustScaler\n",
"values = RobustScaler().fit_transform(df.Amount.values.reshape(-1, 1))\n",
"df['Amount_Scaled'] = values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Remove unneeded features\n",
"y = df.Class.values\n",
"X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
"print(f\"X shape: {X.shape}\\ny shape: {y.shape}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build the models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Divide dataset\n",
"train_size = .7\n",
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=train_size, shuffle=True, random_state=random_state, stratify=y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Linear Tree\n",
"linear_tree = tree.DecisionTreeClassifier(random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Random Forest\n",
"random_forest = RandomForestClassifier(random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Stree\n",
"stree = Stree(random_state=random_state, C=.01)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# AdaBoost\n",
"adaboost = AdaBoostClassifier(random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Gradient Boosting\n",
"gradient = GradientBoostingClassifier(random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Oblique Decision Tree Ensemble\n",
"odte = Odte(random_state=random_state, max_features=\"auto\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Do the test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def try_model(name, model):\n",
" print(f\"************************** {name} **********************\")\n",
" now = time.time()\n",
" model.fit(Xtrain, ytrain)\n",
" spent = time.time() - now\n",
" print(f\"Train Model {name} took: {spent:.4} seconds\")\n",
" predict = model.predict(Xtrain)\n",
" predictt = model.predict(Xtest)\n",
" print(f\"=========== {name} - Train {Xtrain.shape[0]:,} samples =============\",)\n",
" print(classification_report(ytrain, predict, digits=6))\n",
" print(f\"=========== {name} - Test {Xtest.shape[0]:,} samples =============\")\n",
" print(classification_report(ytest, predictt, digits=6))\n",
" print(\"Confusion Matrix in Train\")\n",
" print(confusion_matrix(ytrain, predict))\n",
" print(\"Confusion Matrix in Test\")\n",
" print(confusion_matrix(ytest, predictt))\n",
" return f1_score(ytest, predictt), spent"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Train & Test models\n",
"models = {\n",
" 'Linear Tree':linear_tree, 'Random Forest': random_forest, 'Stree (SVM Tree)': stree, \n",
" 'AdaBoost model': adaboost, 'Odte model': odte #'Gradient Boost.': gradient\n",
"}\n",
"\n",
"best_f1 = 0\n",
"outcomes = []\n",
"for name, model in models.items():\n",
" f1, time_spent = try_model(name, model)\n",
" outcomes.append((name, f1, time_spent))\n",
" if f1 > best_f1:\n",
" best_model = name\n",
" best_time = time_spent\n",
" best_f1 = f1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"print(\"*\"*110)\n",
"print(f\"*The best f1 model is {best_model}, with a f1 score: {best_f1:.4} in {best_time:.6} seconds with {train_size:,} samples in train dataset\")\n",
"print(\"*\"*110)\n",
"for name, f1, time_spent in outcomes:\n",
" print(f\"Model: {name}\\t Time: {time_spent:6.2f} seconds\\t f1: {f1:.4}\")"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"**************************************************************************************************************\n",
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 152.54 seconds with 0.7 samples in train dataset\n",
"**************************************************************************************************************\n",
"Model: Linear Tree\t Time: 13.52 seconds\t f1: 0.7645\n",
"Model: Random Forest\t Time: 152.54 seconds\t f1: 0.8815\n",
"Model: Stree (SVM Tree)\t Time: 32.55 seconds\t f1: 0.8603\n",
"Model: AdaBoost model\t Time: 47.34 seconds\t f1: 0.7509\n",
"Model: Gradient Boost.\t Time: 244.12 seconds\t f1: 0.5259"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"******************************************************************************************************************\n",
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 218.966 seconds with 0.7 samples in train dataset\n",
"******************************************************************************************************************\n",
"Model: Linear Tree Time: 23.05 seconds\t f1: 0.7645\n",
"Model: Random Forest\t Time: 218.97 seconds\t f1: 0.8815\n",
"Model: Stree (SVM Tree)\t Time: 49.45 seconds\t f1: 0.8467\n",
"Model: AdaBoost model\t Time: 73.83 seconds\t f1: 0.7509\n",
"Model: Gradient Boost.\t Time: 388.69 seconds\t f1: 0.5259\n",
"Model: Neural Network\t Time: 25.47 seconds\t f1: 0.8328\n",
"Model: Odte \t Time:2134.25 seconds\t f1: 0.8385\n",
"```"
]
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3.7.6 64-bit ('general': venv)",
"language": "python",
"name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6-final"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"position": {
"height": "392px",
"left": "1518px",
"right": "20px",
"top": "40px",
"width": "392px"
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -1,174 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import datetime, time\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split, cross_validate\n",
"from sklearn import tree\n",
"from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n",
"from stree import Stree\n",
"from odte import Odte\n",
"\n",
"random_state = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_wine\n",
"X, y = load_wine(return_X_y=True)\n",
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"n_estimators = 20\n",
"clf = {}\n",
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
"clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
"clf[\"odte\"] = Odte(n_jobs=-1, base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"print(\"*\"*30,\"Results for wine\", \"*\"*30)\n",
"for clf_type, item in clf.items():\n",
" print(f\"Training {clf_type}...\")\n",
" now = time.time()\n",
" item.fit(Xtrain, ytrain)\n",
" print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_iris\n",
"X, y = load_iris(return_X_y=True)\n",
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"n_estimators = 10\n",
"clf = {}\n",
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
"clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"print(\"*\"*30,\"Results for iris\", \"*\"*30)\n",
"for clf_type, item in clf.items():\n",
" print(f\"Training {clf_type}...\")\n",
" now = time.time()\n",
" item.fit(Xtrain, ytrain)\n",
" print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"cross = cross_validate(estimator=clf[\"odte\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
"print(cross)\n",
"print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"cross = cross_validate(estimator=clf[\"adaboost\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
"print(cross)\n",
"print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from sklearn.utils.estimator_checks import check_estimator\n",
"# Make checks one by one\n",
"c = 0\n",
"checks = check_estimator(Odte(), generate_only=True)\n",
"for check in checks:\n",
" c += 1\n",
" print(c, check[1])\n",
" check[1](check[0])"
]
}
],
"metadata": {
"interpreter": {
"hash": "da86226729227d0e8962a5ec29ea906307507ca2c30ceaaf651c09a617630939"
},
"kernelspec": {
"display_name": "Python 3.9.2 64-bit ('general': venv)",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 2
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -5,6 +5,7 @@ __license__ = "MIT"
Build a forest of oblique trees based on STree, admits any base classifier Build a forest of oblique trees based on STree, admits any base classifier
as well as well
""" """
from __future__ import annotations from __future__ import annotations
import random import random
import json import json
@ -15,7 +16,7 @@ from sklearn.utils.multiclass import ( # type: ignore
check_classification_targets, check_classification_targets,
) )
from sklearn.base import clone, BaseEstimator, ClassifierMixin # type: ignore from sklearn.base import clone, BaseEstimator, ClassifierMixin # type: ignore
from sklearn.utils import check_random_state from sklearn.utils import check_random_state # type: ignore
from sklearn.ensemble import BaseEnsemble # type: ignore from sklearn.ensemble import BaseEnsemble # type: ignore
from sklearn.utils.validation import ( # type: ignore from sklearn.utils.validation import ( # type: ignore
check_is_fitted, check_is_fitted,
@ -30,19 +31,19 @@ class Odte(BaseEnsemble, ClassifierMixin):
def __init__( def __init__(
self, self,
# n_jobs = -1 to use all available cores # n_jobs = -1 to use all available cores
n_jobs: int = 1, n_jobs: int = -1,
base_estimator: BaseEstimator = None, estimator: BaseEstimator = Stree(),
random_state: int = 0, random_state: Optional[int] = None,
max_features: Optional[Union[str, int, float]] = None, max_features: Optional[Union[str, int, float]] = None,
max_samples: Optional[Union[int, float]] = None, max_samples: Optional[Union[int, float]] = None,
n_estimators: int = 100, n_estimators: int = 100,
be_hyperparams: str = "{}", be_hyperparams: str = "{}",
): ):
super().__init__( super().__init__(
base_estimator=base_estimator, estimator=estimator,
n_estimators=n_estimators, n_estimators=n_estimators,
) )
self.base_estimator = base_estimator self.estimator = estimator
self.n_jobs = n_jobs self.n_jobs = n_jobs
self.n_estimators = n_estimators self.n_estimators = n_estimators
self.random_state = random_state self.random_state = random_state
@ -55,13 +56,16 @@ class Odte(BaseEnsemble, ClassifierMixin):
return __version__ return __version__
def _validate_estimator(self) -> None: def _validate_estimator(self) -> None:
"""Check the estimator and set the base_estimator_ attribute.""" """Check the estimator and set the estimator_ attribute."""
super()._validate_estimator( super()._validate_estimator(
default=Stree(random_state=self.random_state) default=Stree(random_state=self.random_state)
) )
def fit( def fit(
self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray = None self,
X: np.ndarray,
y: np.ndarray,
sample_weight: Optional[np.ndarray] = None,
) -> Odte: ) -> Odte:
# Check parameters are Ok. # Check parameters are Ok.
if self.n_estimators < 3: if self.n_estimators < 3:
@ -79,7 +83,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
# Initialize computed parameters # Initialize computed parameters
# Build the estimator # Build the estimator
self.max_features_ = self._initialize_max_features() self.max_features_ = self._initialize_max_features()
# build base_estimator_ # build estimator_
self._validate_estimator() self._validate_estimator()
self.classes_, y = np.unique(y, return_inverse=True) self.classes_, y = np.unique(y, return_inverse=True)
self.n_classes_: int = self.classes_.shape[0] self.n_classes_: int = self.classes_.shape[0]
@ -91,7 +95,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
return self return self
def _compute_metrics(self) -> None: def _compute_metrics(self) -> None:
tdepth = tnodes = tleaves = 0.0 tdepth = tnodes = tleaves = 0
for estimator in self.estimators_: for estimator in self.estimators_:
if hasattr(estimator, "nodes_leaves"): if hasattr(estimator, "nodes_leaves"):
nodes, leaves = estimator.nodes_leaves() nodes, leaves = estimator.nodes_leaves()
@ -99,16 +103,21 @@ class Odte(BaseEnsemble, ClassifierMixin):
tdepth += depth tdepth += depth
tnodes += nodes tnodes += nodes
tleaves += leaves tleaves += leaves
self.depth_ = tdepth / self.n_estimators self.depth_ = tdepth
self.leaves_ = tleaves / self.n_estimators self.leaves_ = tleaves
self.nodes_ = tnodes / self.n_estimators self.nodes_ = tnodes
def _train( def _train(
self, X: np.ndarray, y: np.ndarray, weights: np.ndarray self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]: ) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
n_samples = X.shape[0] n_samples = X.shape[0]
boot_samples = self._get_bootstrap_n_samples(n_samples) boot_samples = self._get_bootstrap_n_samples(n_samples)
estimator = clone(self.base_estimator_) estimator = clone(self.estimator_)
defined_state = (
random.randint(0, 2**31)
if self.random_state is None
else self.random_state
)
return Parallel(n_jobs=self.n_jobs, prefer="threads")( # type: ignore return Parallel(n_jobs=self.n_jobs, prefer="threads")( # type: ignore
delayed(Odte._parallel_build_tree)( delayed(Odte._parallel_build_tree)(
estimator, estimator,
@ -121,13 +130,13 @@ class Odte(BaseEnsemble, ClassifierMixin):
self.be_hyperparams, self.be_hyperparams,
) )
for random_seed in range( for random_seed in range(
self.random_state, self.random_state + self.n_estimators defined_state, defined_state + self.n_estimators
) )
) )
@staticmethod @staticmethod
def _parallel_build_tree( def _parallel_build_tree(
base_estimator_: BaseEstimator, estimator_: BaseEstimator,
X: np.ndarray, X: np.ndarray,
y: np.ndarray, y: np.ndarray,
weights: np.ndarray, weights: np.ndarray,
@ -136,13 +145,15 @@ class Odte(BaseEnsemble, ClassifierMixin):
max_features: int, max_features: int,
hyperparams: str, hyperparams: str,
) -> Tuple[BaseEstimator, Tuple[int, ...]]: ) -> Tuple[BaseEstimator, Tuple[int, ...]]:
clf = clone(base_estimator_) clf = clone(estimator_)
hyperparams_ = json.loads(hyperparams) hyperparams_ = json.loads(hyperparams)
hyperparams_.update(dict(random_state=random_seed)) hyperparams_.update(dict(random_state=random_seed))
clf.set_params(**hyperparams_) clf.set_params(**hyperparams_)
n_samples = X.shape[0] n_samples = X.shape[0]
# bootstrap # initialize random boxes
random.seed(random_seed)
random_box = check_random_state(random_seed) random_box = check_random_state(random_seed)
# bootstrap
indices = random_box.randint(0, n_samples, boot_samples) indices = random_box.randint(0, n_samples, boot_samples)
# update weights with the chosen samples # update weights with the chosen samples
weights_update = np.bincount(indices, minlength=n_samples) weights_update = np.bincount(indices, minlength=n_samples)
@ -249,6 +260,18 @@ class Odte(BaseEnsemble, ClassifierMixin):
result[i, predictions[i]] += 1 result[i, predictions[i]] += 1
return result / self.n_estimators return result / self.n_estimators
def nodes_leaves(self) -> Tuple[float, float]: def get_nodes(self) -> int:
check_is_fitted(self, "estimators_") check_is_fitted(self, "estimators_")
return self.nodes_, self.leaves_ return self.nodes_
def get_leaves(self) -> int:
check_is_fitted(self, "estimators_")
return self.leaves_
def get_depth(self) -> int:
check_is_fitted(self, "estimators_")
return self.depth_
def nodes_leaves(self) -> Tuple[int, int]:
check_is_fitted(self, "estimators_")
return (self.get_nodes(), self.get_leaves())

View File

@ -1,3 +1,4 @@
from ._version import __version__
from .Odte import Odte from .Odte import Odte
__author__ = "Ricardo Montañana Gómez" __author__ = "Ricardo Montañana Gómez"
@ -5,4 +6,4 @@ __copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez"
__license__ = "MIT License" __license__ = "MIT License"
__author_email__ = "ricardo.montanana@alu.uclm.es" __author_email__ = "ricardo.montanana@alu.uclm.es"
__all__ = ["Odte"] __all__ = ["__version__", "Odte"]

View File

@ -1 +1 @@
__version__ = "0.3.2" __version__ = "1.0.0-1"

View File

@ -1,7 +1,6 @@
# type: ignore # type: ignore
import unittest import unittest
import os import os
import random
import warnings import warnings
import json import json
from sklearn.exceptions import ConvergenceWarning, NotFittedError from sklearn.exceptions import ConvergenceWarning, NotFittedError
@ -46,7 +45,10 @@ class Odte_test(unittest.TestCase):
) )
for max_features in [4, 0.4, 1.0, None, "auto", "sqrt", "log2"]: for max_features in [4, 0.4, 1.0, None, "auto", "sqrt", "log2"]:
tclf = Odte( tclf = Odte(
random_state=self._random_state, max_features=max_features random_state=self._random_state,
max_features=max_features,
n_jobs=1,
n_estimators=100,
) )
tclf.fit(X, y) tclf.fit(X, y)
computed = tclf._get_random_subspace(X, y, tclf.max_features_) computed = tclf._get_random_subspace(X, y, tclf.max_features_)
@ -75,15 +77,15 @@ class Odte_test(unittest.TestCase):
X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2] X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2]
expected = [0, 1, 1, 2] expected = [0, 1, 1, 2]
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=10, n_estimators=10,
n_jobs=-1, n_jobs=-1,
) )
tclf.set_params( tclf.set_params(
**dict( **dict(
base_estimator__kernel="rbf", estimator__kernel="rbf",
base_estimator__random_state=self._random_state, estimator__random_state=self._random_state,
) )
) )
computed = tclf.fit(X, y).predict(X) computed = tclf.fit(X, y).predict(X)
@ -95,14 +97,15 @@ class Odte_test(unittest.TestCase):
X, y = load_dataset(self._random_state) X, y = load_dataset(self._random_state)
expected = y expected = y
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
max_features=1.0, max_features=1.0,
max_samples=0.1, max_samples=0.1,
n_estimators=100,
) )
tclf.set_params( tclf.set_params(
**dict( **dict(
base_estimator__kernel="linear", estimator__kernel="linear",
) )
) )
computed = tclf.fit(X, y).predict(X) computed = tclf.fit(X, y).predict(X)
@ -135,7 +138,6 @@ class Odte_test(unittest.TestCase):
0.97, # iwss None 0.97, # iwss None
0.97, # cfs None 0.97, # cfs None
] ]
random.seed(self._random_state)
for max_features in ["auto", None]: for max_features in ["auto", None]:
for splitter in [ for splitter in [
"best", "best",
@ -146,21 +148,22 @@ class Odte_test(unittest.TestCase):
"cfs", "cfs",
]: ]:
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
n_jobs=1,
) )
tclf.set_params( tclf.set_params(
**dict( **dict(
base_estimator__max_features=max_features, estimator__max_features=max_features,
base_estimator__splitter=splitter, estimator__splitter=splitter,
base_estimator__random_state=self._random_state, estimator__random_state=self._random_state,
) )
) )
expected = results.pop(0) expected = results.pop(0)
computed = tclf.fit(X, y).score(X, y) computed = tclf.fit(X, y).score(X, y)
# print(computed, splitter, max_features) # print(computed, splitter, max_features)
self.assertAlmostEqual(expected, computed) self.assertAlmostEqual(expected, computed, msg=splitter)
def test_generate_subspaces(self): def test_generate_subspaces(self):
features = 250 features = 250
@ -177,26 +180,32 @@ class Odte_test(unittest.TestCase):
warnings.filterwarnings("ignore", category=RuntimeWarning) warnings.filterwarnings("ignore", category=RuntimeWarning)
from sklearn.utils.estimator_checks import check_estimator from sklearn.utils.estimator_checks import check_estimator
check_estimator(Odte()) check_estimator(Odte(n_estimators=10))
def test_nodes_leaves_not_fitted(self): def test_nodes_leaves_not_fitted(self):
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
) )
with self.assertRaises(NotFittedError): with self.assertRaises(NotFittedError):
tclf.nodes_leaves() tclf.nodes_leaves()
with self.assertRaises(NotFittedError):
tclf.get_nodes()
with self.assertRaises(NotFittedError):
tclf.get_leaves()
with self.assertRaises(NotFittedError):
tclf.get_depth()
def test_nodes_leaves_depth(self): def test_nodes_leaves_depth(self):
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=5, n_estimators=5,
n_jobs=1, n_jobs=1,
) )
tclf_p = Odte( tclf_p = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=5, n_estimators=5,
n_jobs=-1, n_jobs=-1,
@ -205,28 +214,37 @@ class Odte_test(unittest.TestCase):
tclf.fit(X, y) tclf.fit(X, y)
tclf_p.fit(X, y) tclf_p.fit(X, y)
for clf in [tclf, tclf_p]: for clf in [tclf, tclf_p]:
self.assertAlmostEqual(5.8, clf.depth_) self.assertEqual(29, clf.depth_)
self.assertAlmostEqual(9.4, clf.leaves_) self.assertEqual(29, clf.get_depth())
self.assertAlmostEqual(17.8, clf.nodes_) self.assertEqual(47, clf.leaves_)
self.assertEqual(47, clf.get_leaves())
self.assertEqual(89, clf.nodes_)
self.assertEqual(89, clf.get_nodes())
nodes, leaves = clf.nodes_leaves() nodes, leaves = clf.nodes_leaves()
self.assertAlmostEqual(9.4, leaves) self.assertEqual(47, leaves)
self.assertAlmostEqual(17.8, nodes) self.assertEqual(47, clf.get_leaves())
self.assertEqual(89, nodes)
self.assertEqual(89, clf.get_nodes())
def test_nodes_leaves_SVC(self): def test_nodes_leaves_SVC(self):
tclf = Odte( tclf = Odte(
base_estimator=SVC(), estimator=SVC(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
) )
X, y = load_dataset(self._random_state, n_features=16, n_samples=500) X, y = load_dataset(self._random_state, n_features=16, n_samples=500)
tclf.fit(X, y) tclf.fit(X, y)
self.assertAlmostEqual(0.0, tclf.leaves_) self.assertAlmostEqual(0.0, tclf.leaves_)
self.assertAlmostEqual(0.0, tclf.get_leaves())
self.assertAlmostEqual(0.0, tclf.nodes_) self.assertAlmostEqual(0.0, tclf.nodes_)
self.assertAlmostEqual(0.0, tclf.get_nodes())
nodes, leaves = tclf.nodes_leaves() nodes, leaves = tclf.nodes_leaves()
self.assertAlmostEqual(0.0, leaves) self.assertAlmostEqual(0.0, leaves)
self.assertAlmostEqual(0.0, tclf.get_leaves())
self.assertAlmostEqual(0.0, nodes) self.assertAlmostEqual(0.0, nodes)
self.assertAlmostEqual(0.0, tclf.get_nodes())
def test_base_estimator_hyperparams(self): def test_estimator_hyperparams(self):
data = [ data = [
(Stree(), {"max_features": 7, "max_depth": 2}), (Stree(), {"max_features": 7, "max_depth": 2}),
(SVC(), {"kernel": "linear", "cache_size": 100}), (SVC(), {"kernel": "linear", "cache_size": 100}),
@ -234,7 +252,7 @@ class Odte_test(unittest.TestCase):
for clf, hyperparams in data: for clf, hyperparams in data:
hyperparams_ = json.dumps(hyperparams) hyperparams_ = json.dumps(hyperparams)
tclf = Odte( tclf = Odte(
base_estimator=clf, estimator=clf,
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
be_hyperparams=hyperparams_, be_hyperparams=hyperparams_,

View File

@ -1,4 +1,3 @@
# type: ignore
from .Odte_tests import Odte_test from .Odte_tests import Odte_test
__all__ = ["Odte_test"] __all__ = ["Odte_test"]

View File

@ -1,5 +1,65 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "Odte"
description = "Oblique decision tree Ensemble."
readme = "README.md"
license = { file = "LICENSE" }
authors = [
{ name = "Ricardo Montañana", email = "ricardo.montanana@alu.uclm.es" },
]
dynamic = ['version']
dependencies = ["stree>=1.4"]
requires-python = ">=3.11"
keywords = [
"scikit-learn",
"oblique-classifier",
"oblique-decision-tree",
"decision-tree",
"ensemble",
"svm",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"Topic :: Software Development",
"Topic :: Scientific/Engineering",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
[project.optional-dependencies]
dev = ["black", "flake8", "coverage", "mypy", "pandas", "hatch", "pip-audit"]
doc = ["sphinx", "myst-parser", "sphinx_rtd_theme", "sphinx-autodoc-typehints"]
[project.urls]
Home = "https://github.com/doctorado-ml/odte"
Docs = "https://odte.readthedocs.io"
[tool.hatch.version]
path = "odte/_version.py"
[tool.mypy]
exclude = ['tests']
[tool.coverage.run]
branch = true
source = ["odte"]
command_line = "-m unittest discover -s odte.tests"
[tool.coverage.report]
show_missing = true
fail_under = 100
[tool.black] [tool.black]
line-length = 79 line-length = 79
target_version = ['py311']
include = '\.pyi?$' include = '\.pyi?$'
exclude = ''' exclude = '''
/( /(

View File

@ -1 +1,2 @@
stree>1.2.2 scikit-learn==1.5.2
stree>=1.4

View File

@ -1,48 +0,0 @@
import setuptools
import os
def readme():
with open("README.md") as f:
return f.read()
def get_data(field):
item = ""
file_name = "_version.py" if field == "version" else "__init__.py"
with open(os.path.join("odte", file_name)) as f:
for line in f.readlines():
if line.startswith(f"__{field}__"):
delim = '"' if '"' in line else "'"
item = line.split(delim)[1]
break
else:
raise RuntimeError(f"Unable to find {field} string.")
return item
setuptools.setup(
name="Odte",
version=get_data("version"),
license=get_data("license"),
description="Oblique decision tree Ensemble",
long_description=readme(),
long_description_content_type="text/markdown",
packages=setuptools.find_packages(),
url="https://github.com/doctorado-ml/odte",
author=get_data("author"),
author_email=get_data("author_email"),
keywords="scikit-learn oblique-classifier oblique-decision-tree decision-\
tree ensemble svm svc",
classifiers=[
"Development Status :: 4 - Beta",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.8",
"Natural Language :: English",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Intended Audience :: Science/Research",
],
install_requires=["stree"],
test_suite="odte.tests",
zip_safe=False,
)