Add number of samples in report datasets balance

This commit is contained in:
2023-05-09 10:25:54 +02:00
parent 5c8b7062cc
commit e3d969c5d7
5 changed files with 14 additions and 13 deletions

View File

@@ -165,7 +165,7 @@ class Datasets:
comp = "" comp = ""
sep = "" sep = ""
for count in counts: for count in counts:
comp += f"{sep}{count/sum(counts)*100:5.2f}%" comp += f"{sep}{count/sum(counts)*100:5.2f}% ({count}) "
sep = "/ " sep = "/ "
attr.balance = comp attr.balance = comp
attr.classes = len(np.unique(y)) attr.classes = len(np.unique(y))

View File

@@ -814,18 +814,20 @@ class ReportDatasets:
def report(self): def report(self):
data_sets = Datasets() data_sets = Datasets()
max_len = max(
[len(data_sets.get_attributes(data).balance) for data in data_sets]
)
color_line = TextColor.LINE1 color_line = TextColor.LINE1
if self.excel:
self.header()
if self.output: if self.output:
print(color_line, end="") print(color_line, end="")
print(self.header_text) print(self.header_text)
print("") print("")
print(f"{'Dataset':30s} Sampl. Feat. Cont Cls Balance") print(f"{'Dataset':30s} Sampl. Feat. Cont Cls Balance")
print("=" * 30 + " ====== ===== ==== === " + "=" * 60) print("=" * 30 + " ====== ===== ==== === " + "=" * max_len)
if self.excel:
self.header()
for dataset in data_sets: for dataset in data_sets:
attributes = data_sets.get_attributes(dataset) attributes = data_sets.get_attributes(dataset)
if self.excel: if self.excel:
self.print_line(attributes) self.print_line(attributes)
color_line = ( color_line = (

View File

@@ -87,7 +87,6 @@ class ReportTest(TestBase):
if self.stree_version in line: if self.stree_version in line:
# replace STree version # replace STree version
line = self.replace_STree_version(line, output_text, index) line = self.replace_STree_version(line, output_text, index)
self.assertEqual(line, output_text[index]) self.assertEqual(line, output_text[index])
@patch("sys.stdout", new_callable=StringIO) @patch("sys.stdout", new_callable=StringIO)

View File

@@ -1,4 +1,4 @@
1;1;"Datasets used in benchmark ver. 0.4.0" 1;1;"Datasets used in benchmark ver. 0.5.0"
2;1;" Default score accuracy" 2;1;" Default score accuracy"
2;2;"Cross validation" 2;2;"Cross validation"
2;6;"5 Folds" 2;6;"5 Folds"
@@ -19,10 +19,10 @@
7;3;"4" 7;3;"4"
7;4;"0" 7;4;"0"
7;5;"3" 7;5;"3"
7;6;" 7.84%/ 46.08%/ 46.08%" 7;6;" 7.84% (49) / 46.08% (288) / 46.08% (288) "
8;1;"balloons" 8;1;"balloons"
8;2;"16" 8;2;"16"
8;3;"4" 8;3;"4"
8;4;"0" 8;4;"0"
8;5;"2" 8;5;"2"
8;6;"56.25%/ 43.75%" 8;6;"56.25% (9) / 43.75% (7) "

View File

@@ -1,6 +1,6 @@
Datasets used in benchmark ver. 0.2.0 Datasets used in benchmark ver. 0.5.0
Dataset Sampl. Feat. Cont Cls Balance Dataset Sampl. Feat. Cont Cls Balance
============================== ====== ===== ==== === ============================================================ ============================== ====== ===== ==== === ==========================================
balance-scale 625 4 0 3 7.84%/ 46.08%/ 46.08% balance-scale 625 4 0 3 7.84% (49) / 46.08% (288) / 46.08% (288)
balloons 16 4 0 2 56.25%/ 43.75% balloons 16 4 0 2 56.25% (9) / 43.75% (7)