fimdlp/sample.py

from sklearn.datasets import load_iris
from fimdlp.mdlp import FImdlp
from fimdlp.cppfimdlp import CFImdlp
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import time
from math import log2

from scipy.io import arff
import pandas as pd

# class_name = "speaker"
# file_name = "kdd_JapaneseVowels.arff"
class_name = "class"
# file_name = "mfeat-factors.arff"
file_name = "letter.arff"
data = arff.loadarff(file_name)
df = pd.DataFrame(data[0])
df.dropna(axis=0, how="any", inplace=True)
dataset = df
X = df.drop(class_name, axis=1)
features = X.columns
class_name = class_name
y, _ = pd.factorize(df[class_name])
X = X.to_numpy()

# data = load_iris()
# X = data.data
# y = data.target
# features = data.feature_names


test = FImdlp()
now = time.time()
# test.fit(X, y, features=[i for i in (range(3, 14))])
test.fit(X, y)
fit_time = time.time()
print("Fitting: ", fit_time - now)
now = time.time()
Xt = test.transform(X)
print("Transforming: ", time.time() - now)
print(test.get_cut_points())

clf = RandomForestClassifier(random_state=0)
print(clf.fit(Xt, y).score(Xt, y))
print(Xt)
# for proposal in [True, False]:
#     X = data.data
#     y = data.target
#     print("*** Proposal: ", proposal)
#     test = CFImdlp(debug=True, proposal=proposal)
#     test.fit(X[:, 0], y)
#     result = test.get_cut_points()
#     for item in result:
#         print(
#             f"Class={item['classNumber']} - ({item['start']:3d}, "
#             f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
#             f"{item['toValue']:3.1f}]"
#         )
#     print(test.get_discretized_values())
#     print("+" * 40)
#     X = np.array(
#         [
#             [5.1, 3.5, 1.4, 0.2],
#             [5.2, 3.0, 1.4, 0.2],
#             [5.3, 3.2, 1.3, 0.2],
#             [5.4, 3.1, 1.5, 0.2],
#         ]
#     )
#     y = np.array([0, 0, 0, 1])
#     print(test.fit(X[:, 0], y).transform(X[:, 0]))
#     result = test.get_cut_points()
#     for item in result:
#         print(
#             f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
#             f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
#         )
#     print("*" * 40)
# # print(Xs, ys)
# # print("**********************")
# # test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
# # print(ys)
# # for start, end in test:
# #     print("Testing ", start, end, ys[:end], ys[end:])
# #     print("Information gain: ", information_gain(ys, ys[:end], ys[end:]))
# # print(test.transform(X))
# # print(X)
# # print(indices)
# # print(np.array(X)[indices])


# # # k = test.cut_points(X[:, 0], y)
# # # print(k)
# # # k = test.cut_points_ant(X[:, 0], y)
# # # print(k)
# # # test.debug_points(X[:, 0], y)
# # X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# # indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
# # clf = CFImdlp(debug=True, proposal=False)
# # clf.fit(X, y)
# # print(clf.get_cut_points())
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# # # To check
# # indices2 = np.argsort(X)
# # Xs = np.array(X)[indices2]
# # ys = np.array(y)[indices2]
# kdd_JapaneseVowels