mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-19 17:36:01 +00:00
Commit Inicial
This commit is contained in:
2
data/tanveer/spambase/conxuntos.dat
Executable file
2
data/tanveer/spambase/conxuntos.dat
Executable file
File diff suppressed because one or more lines are too long
8
data/tanveer/spambase/conxuntos_kfold.dat
Executable file
8
data/tanveer/spambase/conxuntos_kfold.dat
Executable file
File diff suppressed because one or more lines are too long
BIN
data/tanveer/spambase/descripcion_spambase.pdf
Executable file
BIN
data/tanveer/spambase/descripcion_spambase.pdf
Executable file
Binary file not shown.
23
data/tanveer/spambase/le_datos.m
Executable file
23
data/tanveer/spambase/le_datos.m
Executable file
@@ -0,0 +1,23 @@
|
||||
printf('lendo problema %s ...\n', problema);
|
||||
|
||||
n_entradas= 57; n_clases= 2; n_fich= 1; fich{1}= 'spambase.data'; n_patrons(1)= 4601;
|
||||
|
||||
n_max= max(n_patrons);
|
||||
x = zeros(n_fich, n_max, n_entradas); cl= zeros(n_fich, n_max);
|
||||
|
||||
n_patrons_total = sum(n_patrons); n_iter=0;
|
||||
|
||||
for i_fich=1:n_fich
|
||||
f=fopen(fich{i_fich}, 'r');
|
||||
if -1==f
|
||||
error('erro en fopen abrindo %s\n', fich{i_fich});
|
||||
end
|
||||
for i=1:n_patrons(i_fich)
|
||||
fprintf(2,'%5.1f%%\r', 100*n_iter++/n_patrons_total);
|
||||
for j = 1:n_entradas
|
||||
x(i_fich,i,j) = fscanf(f,'%g',1);
|
||||
end
|
||||
cl(i_fich,i) = fscanf(f,'%i',1); % lectura da clase
|
||||
end
|
||||
fclose(f);
|
||||
end
|
4661
data/tanveer/spambase/spambase.arff
Executable file
4661
data/tanveer/spambase/spambase.arff
Executable file
File diff suppressed because it is too large
Load Diff
5
data/tanveer/spambase/spambase.cost
Executable file
5
data/tanveer/spambase/spambase.cost
Executable file
@@ -0,0 +1,5 @@
|
||||
% Rows Columns
|
||||
2 2
|
||||
% Matrix elements
|
||||
0.0 1.0
|
||||
1.0 0.0
|
4601
data/tanveer/spambase/spambase.data
Executable file
4601
data/tanveer/spambase/spambase.data
Executable file
File diff suppressed because it is too large
Load Diff
90
data/tanveer/spambase/spambase.names
Executable file
90
data/tanveer/spambase/spambase.names
Executable file
@@ -0,0 +1,90 @@
|
||||
| SPAM E-MAIL DATABASE ATTRIBUTES (in .names format)
|
||||
|
|
||||
| 48 continuous real [0,100] attributes of type word_freq_WORD
|
||||
| = percentage of words in the e-mail that match WORD,
|
||||
| i.e. 100 * (number of times the WORD appears in the e-mail) /
|
||||
| total number of words in e-mail. A "word" in this case is any
|
||||
| string of alphanumeric characters bounded by non-alphanumeric
|
||||
| characters or end-of-string.
|
||||
|
|
||||
| 6 continuous real [0,100] attributes of type char_freq_CHAR
|
||||
| = percentage of characters in the e-mail that match CHAR,
|
||||
| i.e. 100 * (number of CHAR occurences) / total characters in e-mail
|
||||
|
|
||||
| 1 continuous real [1,...] attribute of type capital_run_length_average
|
||||
| = average length of uninterrupted sequences of capital letters
|
||||
|
|
||||
| 1 continuous integer [1,...] attribute of type capital_run_length_longest
|
||||
| = length of longest uninterrupted sequence of capital letters
|
||||
|
|
||||
| 1 continuous integer [1,...] attribute of type capital_run_length_total
|
||||
| = sum of length of uninterrupted sequences of capital letters
|
||||
| = total number of capital letters in the e-mail
|
||||
|
|
||||
| 1 nominal {0,1} class attribute of type spam
|
||||
| = denotes whether the e-mail was considered spam (1) or not (0),
|
||||
| i.e. unsolicited commercial e-mail.
|
||||
|
|
||||
| For more information, see file 'spambase.DOCUMENTATION' at the
|
||||
| UCI Machine Learning Repository: http://www.ics.uci.edu/~mlearn/MLRepository.html
|
||||
|
||||
|
||||
1, 0. | spam, non-spam classes
|
||||
|
||||
word_freq_make: continuous.
|
||||
word_freq_address: continuous.
|
||||
word_freq_all: continuous.
|
||||
word_freq_3d: continuous.
|
||||
word_freq_our: continuous.
|
||||
word_freq_over: continuous.
|
||||
word_freq_remove: continuous.
|
||||
word_freq_internet: continuous.
|
||||
word_freq_order: continuous.
|
||||
word_freq_mail: continuous.
|
||||
word_freq_receive: continuous.
|
||||
word_freq_will: continuous.
|
||||
word_freq_people: continuous.
|
||||
word_freq_report: continuous.
|
||||
word_freq_addresses: continuous.
|
||||
word_freq_free: continuous.
|
||||
word_freq_business: continuous.
|
||||
word_freq_email: continuous.
|
||||
word_freq_you: continuous.
|
||||
word_freq_credit: continuous.
|
||||
word_freq_your: continuous.
|
||||
word_freq_font: continuous.
|
||||
word_freq_000: continuous.
|
||||
word_freq_money: continuous.
|
||||
word_freq_hp: continuous.
|
||||
word_freq_hpl: continuous.
|
||||
word_freq_george: continuous.
|
||||
word_freq_650: continuous.
|
||||
word_freq_lab: continuous.
|
||||
word_freq_labs: continuous.
|
||||
word_freq_telnet: continuous.
|
||||
word_freq_857: continuous.
|
||||
word_freq_data: continuous.
|
||||
word_freq_415: continuous.
|
||||
word_freq_85: continuous.
|
||||
word_freq_technology: continuous.
|
||||
word_freq_1999: continuous.
|
||||
word_freq_parts: continuous.
|
||||
word_freq_pm: continuous.
|
||||
word_freq_direct: continuous.
|
||||
word_freq_cs: continuous.
|
||||
word_freq_meeting: continuous.
|
||||
word_freq_original: continuous.
|
||||
word_freq_project: continuous.
|
||||
word_freq_re: continuous.
|
||||
word_freq_edu: continuous.
|
||||
word_freq_table: continuous.
|
||||
word_freq_conference: continuous.
|
||||
char_freq_;: continuous.
|
||||
char_freq_(: continuous.
|
||||
char_freq_[: continuous.
|
||||
char_freq_!: continuous.
|
||||
char_freq_$: continuous.
|
||||
char_freq_#: continuous.
|
||||
capital_run_length_average: continuous.
|
||||
capital_run_length_longest: continuous.
|
||||
capital_run_length_total: continuous.
|
8
data/tanveer/spambase/spambase.txt
Executable file
8
data/tanveer/spambase/spambase.txt
Executable file
@@ -0,0 +1,8 @@
|
||||
n_entradas= 57
|
||||
n_clases= 2
|
||||
n_arquivos= 1
|
||||
fich1= spambase_R.dat
|
||||
n_patrons1= 4601
|
||||
n_patrons_entrena= 2301
|
||||
n_patrons_valida= 2300
|
||||
n_conxuntos= 1
|
4602
data/tanveer/spambase/spambase_R.dat
Executable file
4602
data/tanveer/spambase/spambase_R.dat
Executable file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user