mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-18 17:06:02 +00:00
Commit Inicial
This commit is contained in:
BIN
data/tanveer/bank/bank-full.csv
(Stored with Git LFS)
Executable file
BIN
data/tanveer/bank/bank-full.csv
(Stored with Git LFS)
Executable file
Binary file not shown.
|
70
data/tanveer/bank/bank-names.txt
Executable file
70
data/tanveer/bank/bank-names.txt
Executable file
@@ -0,0 +1,70 @@
|
||||
Citation Request:
|
||||
This dataset is public available for research. The details are described in [Moro et al., 2011].
|
||||
Please include this citation if you plan to use this database:
|
||||
|
||||
[Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology.
|
||||
In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM'2011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.
|
||||
|
||||
Available at: [pdf] http://hdl.handle.net/1822/14838
|
||||
[bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt
|
||||
|
||||
1. Title: Bank Marketing
|
||||
|
||||
2. Sources
|
||||
Created by: Paulo Cortez (Univ. Minho) and Sérgio Moro (ISCTE-IUL) @ 2012
|
||||
|
||||
3. Past Usage:
|
||||
|
||||
The full dataset was described and analyzed in:
|
||||
|
||||
S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology.
|
||||
In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM'2011, pp. 117-121, Guimarães,
|
||||
Portugal, October, 2011. EUROSIS.
|
||||
|
||||
4. Relevant Information:
|
||||
|
||||
The data is related with direct marketing campaigns of a Portuguese banking institution.
|
||||
The marketing campaigns were based on phone calls. Often, more than one contact to the same client was required,
|
||||
in order to access if the product (bank term deposit) would be (or not) subscribed.
|
||||
|
||||
There are two datasets:
|
||||
1) bank-full.csv with all examples, ordered by date (from May 2008 to November 2010).
|
||||
2) bank.csv with 10% of the examples (4521), randomly selected from bank-full.csv.
|
||||
The smallest dataset is provided to test more computationally demanding machine learning algorithms (e.g. SVM).
|
||||
|
||||
The classification goal is to predict if the client will subscribe a term deposit (variable y).
|
||||
|
||||
5. Number of Instances: 45211 for bank-full.csv (4521 for bank.csv)
|
||||
|
||||
6. Number of Attributes: 16 + output attribute.
|
||||
|
||||
7. Attribute information:
|
||||
|
||||
For more information, read [Moro et al., 2011].
|
||||
|
||||
Input variables:
|
||||
# bank client data:
|
||||
1 - age (numeric)
|
||||
2 - job : type of job (categorical: "admin.","unknown","unemployed","management","housemaid","entrepreneur","student",
|
||||
"blue-collar","self-employed","retired","technician","services")
|
||||
3 - marital : marital status (categorical: "married","divorced","single"; note: "divorced" means divorced or widowed)
|
||||
4 - education (categorical: "unknown","secondary","primary","tertiary")
|
||||
5 - default: has credit in default? (binary: "yes","no")
|
||||
6 - balance: average yearly balance, in euros (numeric)
|
||||
7 - housing: has housing loan? (binary: "yes","no")
|
||||
8 - loan: has personal loan? (binary: "yes","no")
|
||||
# related with the last contact of the current campaign:
|
||||
9 - contact: contact communication type (categorical: "unknown","telephone","cellular")
|
||||
10 - day: last contact day of the month (numeric)
|
||||
11 - month: last contact month of year (categorical: "jan", "feb", "mar", ..., "nov", "dec")
|
||||
12 - duration: last contact duration, in seconds (numeric)
|
||||
# other attributes:
|
||||
13 - campaign: number of contacts performed during this campaign and for this client (numeric, includes last contact)
|
||||
14 - pdays: number of days that passed by after the client was last contacted from a previous campaign (numeric, -1 means client was not previously contacted)
|
||||
15 - previous: number of contacts performed before this campaign and for this client (numeric)
|
||||
16 - poutcome: outcome of the previous marketing campaign (categorical: "unknown","other","failure","success")
|
||||
|
||||
Output variable (desired target):
|
||||
17 - y - has the client subscribed a term deposit? (binary: "yes","no")
|
||||
|
||||
8. Missing Attribute Values: None
|
4540
data/tanveer/bank/bank.arff
Executable file
4540
data/tanveer/bank/bank.arff
Executable file
File diff suppressed because it is too large
Load Diff
5
data/tanveer/bank/bank.cost
Executable file
5
data/tanveer/bank/bank.cost
Executable file
@@ -0,0 +1,5 @@
|
||||
% Rows Columns
|
||||
2 2
|
||||
% Matrix elements
|
||||
0.0 1.0
|
||||
1.0 0.0
|
BIN
data/tanveer/bank/bank.csv
(Stored with Git LFS)
Executable file
BIN
data/tanveer/bank/bank.csv
(Stored with Git LFS)
Executable file
Binary file not shown.
|
8
data/tanveer/bank/bank.txt
Executable file
8
data/tanveer/bank/bank.txt
Executable file
@@ -0,0 +1,8 @@
|
||||
n_entradas= 16
|
||||
n_clases= 2
|
||||
n_arquivos= 1
|
||||
fich1= bank_R.dat
|
||||
n_patrons1= 4521
|
||||
n_patrons_entrena= 2261
|
||||
n_patrons_valida= 2260
|
||||
n_conxuntos= 1
|
BIN
data/tanveer/bank/bank.zip
Executable file
BIN
data/tanveer/bank/bank.zip
Executable file
Binary file not shown.
4522
data/tanveer/bank/bank_R.dat
Executable file
4522
data/tanveer/bank/bank_R.dat
Executable file
File diff suppressed because it is too large
Load Diff
2
data/tanveer/bank/conxuntos.dat
Executable file
2
data/tanveer/bank/conxuntos.dat
Executable file
File diff suppressed because one or more lines are too long
8
data/tanveer/bank/conxuntos_kfold.dat
Executable file
8
data/tanveer/bank/conxuntos_kfold.dat
Executable file
File diff suppressed because one or more lines are too long
71
data/tanveer/bank/le_datos.m
Executable file
71
data/tanveer/bank/le_datos.m
Executable file
@@ -0,0 +1,71 @@
|
||||
printf('lendo problema %s ...\n', problema);
|
||||
|
||||
n_entradas= 16; n_clases= 2; n_fich= 1; fich{1}= 'bank.csv'; n_patrons(1)= 4521; %fich{2}= ' '; n_patrons(2)= 0;
|
||||
|
||||
n_max= max(n_patrons);
|
||||
x = zeros(n_fich, n_max, n_entradas); cl= zeros(n_fich, n_max);
|
||||
|
||||
n_patrons_total = sum(n_patrons); n_iter=0;
|
||||
|
||||
for i_fich=1:n_fich
|
||||
f=fopen(fich{i_fich}, 'r');
|
||||
if -1==f
|
||||
error('erro en fopen abrindo %s\n', fich{i_fich});
|
||||
end
|
||||
fscanf(f,'%s',1); % le e descarta a 1ª fila
|
||||
for i=1:n_patrons(i_fich)
|
||||
fprintf(2,'%5.1f%%\r', 100*n_iter++/n_patrons_total);
|
||||
for j = 1:n_entradas
|
||||
if j==2
|
||||
val={'admin.','unknown','unemployed','management','housemaid','entrepreneur','student','blue-collar','self-employed', 'retired', 'technician', 'services'};
|
||||
elseif j==3
|
||||
val={'married','divorced','single'};
|
||||
elseif j==4
|
||||
val={ 'unknown','secondary','primary','tertiary'};
|
||||
elseif (j==5 || j==7 || j==8)
|
||||
val={'yes','no'};
|
||||
elseif j==9
|
||||
val={'unknown','telephone','cellular'};
|
||||
elseif j==11
|
||||
val={'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'};
|
||||
elseif j==16
|
||||
val={'unknown','other','failure','success'};
|
||||
else % j=1,6,10,12,13,14,15
|
||||
x(i_fich,i,j)=fscanf(f,'%g',1); fscanf(f,'%c',1); % le e descarta o carácter ; de separación entre entradas
|
||||
continue
|
||||
end
|
||||
t=''; fscanf(f,'%c',1); % le e descarta o carácter " anterior á cadea de caracteres
|
||||
while 1
|
||||
c = fscanf(f, '%c',1);
|
||||
if c=='"'
|
||||
break
|
||||
end
|
||||
t=strcat(t,c);
|
||||
end
|
||||
n=length(val); a=2/(n-1); b=(1+n)/(1-n);
|
||||
for k=1:n
|
||||
if strcmp(t,val{k})
|
||||
x(i_fich,i,j)=a*k+b; break
|
||||
end
|
||||
end
|
||||
fscanf(f,'%c',1); % le e descarta o carácter ; de separación entre entradas
|
||||
end
|
||||
% lectura da clase
|
||||
t=''; fscanf(f,'%c',1); % le e descarta o carácter " anterior á cadea de caracteres da clase
|
||||
while 1
|
||||
c = fscanf(f, '%c',1);
|
||||
if c=='"'
|
||||
break
|
||||
end
|
||||
t=strcat(t,c);
|
||||
end
|
||||
if strcmp(t, 'no')
|
||||
cl(i_fich,i) = 0;
|
||||
elseif strcmp(t,'yes')
|
||||
cl(i_fich,i) = 1;
|
||||
else
|
||||
error('clase <%s> descoñecida!')
|
||||
end
|
||||
end
|
||||
fclose(f);
|
||||
end
|
Reference in New Issue
Block a user