mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 00:15:52 +00:00
109 lines
3.0 KiB
Python
109 lines
3.0 KiB
Python
from sklearn.datasets import load_iris
|
|
from fimdlp.mdlp import FImdlp
|
|
from fimdlp.cppfimdlp import CFImdlp
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
import numpy as np
|
|
import time
|
|
from math import log2
|
|
|
|
from scipy.io import arff
|
|
import pandas as pd
|
|
|
|
# class_name = "speaker"
|
|
# file_name = "kdd_JapaneseVowels.arff"
|
|
class_name = "class"
|
|
# file_name = "mfeat-factors.arff"
|
|
file_name = "letter.arff"
|
|
data = arff.loadarff(file_name)
|
|
df = pd.DataFrame(data[0])
|
|
df.dropna(axis=0, how="any", inplace=True)
|
|
dataset = df
|
|
X = df.drop(class_name, axis=1)
|
|
features = X.columns
|
|
class_name = class_name
|
|
y, _ = pd.factorize(df[class_name])
|
|
X = X.to_numpy()
|
|
|
|
# data = load_iris()
|
|
# X = data.data
|
|
# y = data.target
|
|
# features = data.feature_names
|
|
|
|
|
|
test = FImdlp()
|
|
now = time.time()
|
|
# test.fit(X, y, features=[i for i in (range(3, 14))])
|
|
test.fit(X, y)
|
|
fit_time = time.time()
|
|
print("Fitting: ", fit_time - now)
|
|
now = time.time()
|
|
Xt = test.transform(X)
|
|
print("Transforming: ", time.time() - now)
|
|
print(test.get_cut_points())
|
|
|
|
clf = RandomForestClassifier(random_state=0)
|
|
print(clf.fit(Xt, y).score(Xt, y))
|
|
print(Xt)
|
|
# for proposal in [True, False]:
|
|
# X = data.data
|
|
# y = data.target
|
|
# print("*** Proposal: ", proposal)
|
|
# test = CFImdlp(debug=True, proposal=proposal)
|
|
# test.fit(X[:, 0], y)
|
|
# result = test.get_cut_points()
|
|
# for item in result:
|
|
# print(
|
|
# f"Class={item['classNumber']} - ({item['start']:3d}, "
|
|
# f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
|
|
# f"{item['toValue']:3.1f}]"
|
|
# )
|
|
# print(test.get_discretized_values())
|
|
# print("+" * 40)
|
|
# X = np.array(
|
|
# [
|
|
# [5.1, 3.5, 1.4, 0.2],
|
|
# [5.2, 3.0, 1.4, 0.2],
|
|
# [5.3, 3.2, 1.3, 0.2],
|
|
# [5.4, 3.1, 1.5, 0.2],
|
|
# ]
|
|
# )
|
|
# y = np.array([0, 0, 0, 1])
|
|
# print(test.fit(X[:, 0], y).transform(X[:, 0]))
|
|
# result = test.get_cut_points()
|
|
# for item in result:
|
|
# print(
|
|
# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
|
# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
|
# )
|
|
# print("*" * 40)
|
|
# # print(Xs, ys)
|
|
# # print("**********************")
|
|
# # test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
|
|
# # print(ys)
|
|
# # for start, end in test:
|
|
# # print("Testing ", start, end, ys[:end], ys[end:])
|
|
# # print("Information gain: ", information_gain(ys, ys[:end], ys[end:]))
|
|
# # print(test.transform(X))
|
|
# # print(X)
|
|
# # print(indices)
|
|
# # print(np.array(X)[indices])
|
|
|
|
|
|
# # # k = test.cut_points(X[:, 0], y)
|
|
# # # print(k)
|
|
# # # k = test.cut_points_ant(X[:, 0], y)
|
|
# # # print(k)
|
|
# # # test.debug_points(X[:, 0], y)
|
|
# # X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
|
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
|
# # indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
|
# # clf = CFImdlp(debug=True, proposal=False)
|
|
# # clf.fit(X, y)
|
|
# # print(clf.get_cut_points())
|
|
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
|
# # # To check
|
|
# # indices2 = np.argsort(X)
|
|
# # Xs = np.array(X)[indices2]
|
|
# # ys = np.array(y)[indices2]
|
|
# kdd_JapaneseVowels
|