mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Change arff library to sample.py
This commit is contained in:
@@ -5,6 +5,7 @@ from scipy.io import arff
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
from fimdlp.mdlp import FImdlp
|
from fimdlp.mdlp import FImdlp
|
||||||
|
from fimdlp.cppfimdlp import CArffFiles
|
||||||
|
|
||||||
datasets = {
|
datasets = {
|
||||||
"mfeat-factors": True,
|
"mfeat-factors": True,
|
||||||
@@ -29,13 +30,15 @@ relative = "" if os.path.isdir("src") else ".."
|
|||||||
file_name = os.path.join(
|
file_name = os.path.join(
|
||||||
relative, "src", "cppmdlp", "tests", "datasets", args.dataset
|
relative, "src", "cppmdlp", "tests", "datasets", args.dataset
|
||||||
)
|
)
|
||||||
data = arff.loadarff(file_name + ".arff")
|
arff = CArffFiles()
|
||||||
df = pd.DataFrame(data[0])
|
arff.load(bytes(f"{file_name}.arff", "utf-8"))
|
||||||
class_column = -1 if datasets[args.dataset] else 0
|
X = arff.get_X()
|
||||||
class_name = df.columns.to_list()[class_column]
|
y = arff.get_y()
|
||||||
X = df.drop(class_name, axis=1)
|
attributes = arff.get_attributes()
|
||||||
y, _ = pd.factorize(df[class_name])
|
attributes = [x[0].decode() for x in attributes]
|
||||||
X = X.to_numpy()
|
df = pd.DataFrame(X, columns=attributes)
|
||||||
|
class_name = arff.get_class_name().decode()
|
||||||
|
df[class_name] = y
|
||||||
test = FImdlp(
|
test = FImdlp(
|
||||||
min_length=args.min_length,
|
min_length=args.min_length,
|
||||||
max_depth=args.max_depth,
|
max_depth=args.max_depth,
|
||||||
@@ -48,7 +51,13 @@ print(f"Fitting ....: {fit_time - now:7.5f} seconds")
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
Xt = test.transform(X)
|
Xt = test.transform(X)
|
||||||
print(f"Transforming: {time.time() - now:7.5f} seconds")
|
print(f"Transforming: {time.time() - now:7.5f} seconds")
|
||||||
print(test.get_cut_points())
|
cut_points = test.get_cut_points()
|
||||||
|
for i, cuts in enumerate(cut_points):
|
||||||
|
print(f"Cut points for feature {attributes[i]}: {cuts}")
|
||||||
|
print(f"Min: {min(X[:, i]):6.4f} Max: {max(X[:, i]):6.4f}")
|
||||||
|
num_cuts = sum([len(x) for x in cut_points])
|
||||||
|
print(f"Total cut points ...: {num_cuts}")
|
||||||
|
print(f"Total feature states: {num_cuts + len(attributes)}")
|
||||||
clf = RandomForestClassifier(random_state=0)
|
clf = RandomForestClassifier(random_state=0)
|
||||||
print(
|
print(
|
||||||
"Random Forest score with discretized data: ", clf.fit(Xt, y).score(Xt, y)
|
"Random Forest score with discretized data: ", clf.fit(Xt, y).score(Xt, y)
|
||||||
|
Reference in New Issue
Block a user