import time import argparse import os from scipy.io import arff import pandas as pd from sklearn.ensemble import RandomForestClassifier from fimdlp.mdlp import FImdlp datasets = { "mfeat-factors": True, "iris": True, "letter": True, "kdd_JapaneseVowels": False, } ap = argparse.ArgumentParser() ap.add_argument( "--alternative", dest="proposal", action="store_const", const=1 ) ap.add_argument("dataset", type=str, choices=datasets.keys()) args = ap.parse_args() relative = "" if os.path.isdir("src") else ".." file_name = os.path.join( relative, "src", "cppmdlp", "tests", "datasets", args.dataset ) data = arff.loadarff(file_name + ".arff") df = pd.DataFrame(data[0]) class_column = -1 if datasets[args.dataset] else 0 class_name = df.columns.to_list()[class_column] X = df.drop(class_name, axis=1) y, _ = pd.factorize(df[class_name]) X = X.to_numpy() test = FImdlp(algorithm=args.proposal if args.proposal is not None else 0) now = time.time() test.fit(X, y) fit_time = time.time() print("Fitting: ", fit_time - now) now = time.time() Xt = test.transform(X) print("Transforming: ", time.time() - now) print(test.get_cut_points()) clf = RandomForestClassifier(random_state=0) print( "Random Forest score with discretized data: ", clf.fit(Xt, y).score(Xt, y) )