mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-15 23:46:03 +00:00
77 lines
3.1 KiB
Matlab
Executable File
77 lines
3.1 KiB
Matlab
Executable File
% adult
|
|
printf('lendo problema adult...\n');
|
|
|
|
n_entradas= 14; n_clases= 2; n_fich= 2; fich{1}= 'adult.data'; n_patrons(1)= 32561; fich{2}= 'adult.test'; n_patrons(2)= 16281;
|
|
|
|
n_max= max(n_patrons);
|
|
x = zeros(n_fich, n_max, n_entradas); cl= zeros(n_fich, n_max);
|
|
|
|
discreta = [0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1];
|
|
workclass = {'Private', 'Self-emp-not-inc', 'Self-emp-inc', 'Federal-gov', 'Local-gov', 'State-gov', 'Without-pay', 'Never-worked'};
|
|
education = {'Bachelors', 'Some-college', '11th', 'HS-grad', 'Prof-school', 'Assoc-acdm', 'Assoc-voc', '9th', '7th-8th', '12th', 'Masters', '1st-4th', '10th', 'Doctorate', '5th-6th', 'Preschool'};
|
|
marital = {'Married-civ-spouse', 'Divorced', 'Never-married', 'Separated', 'Widowed', 'Married-spouse-absent', 'Married-AF-spouse'};
|
|
occupation = {'Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces'};
|
|
relationship = {'Wife', 'Own-child', 'Husband', 'Not-in-family', 'Other-relative', 'Unmarried'};
|
|
race = {'White', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other', 'Black'};
|
|
sex = {'Male', 'Female'};
|
|
country = {'United-States', 'Cambodia', 'England', 'Puerto-Rico', 'Canada', 'Germany', 'Outlying-US(Guam-USVI-etc)', 'India', 'Japan', 'Greece', 'South', 'China', 'Cuba', 'Iran', 'Honduras', 'Philippines', 'Italy', 'Poland', 'Jamaica', 'Vietnam', 'Mexico', 'Portugal', 'Ireland', 'France', 'Dominican-Republic', 'Laos', 'Ecuador', 'Taiwan', 'Haiti', 'Columbia', 'Hungary', 'Guatemala', 'Nicaragua', 'Scotland', 'Thailand', 'Yugoslavia', 'El-Salvador', 'Trinadad&Tobago', 'Peru', 'Hong', 'Holand-Netherlands'};
|
|
|
|
n_workclass=8; n_education=16; n_marital=7; n_occupation=14; n_relationship=6; n_race=5; n_sex=2; n_country=41;
|
|
|
|
for i_fich = 1:n_fich
|
|
f=fopen(fich{i_fich}, 'r');
|
|
if -1==f
|
|
error('erro en fopen abrindo %s\n', fich{i_fich});
|
|
end
|
|
|
|
for i=1:n_patrons(i_fich)
|
|
fprintf(2,'%5.1f%%\r', 100*i/n_patrons(i_fich));
|
|
for j = 1:n_entradas
|
|
if discreta(j)==1
|
|
s = fscanf(f,'%s',1); fscanf(f,'%c',1);
|
|
% printf('%s ', s)
|
|
if strcmp(s, '?') % entrada ausente neste patrón
|
|
x(i_fich,i,j)=0;
|
|
else
|
|
if j==2
|
|
n = n_workclass; p=workclass;
|
|
elseif j==4
|
|
n = n_education; p=education;
|
|
elseif j==6
|
|
n = n_marital; p=marital;
|
|
elseif j==7
|
|
n = n_occupation; p=occupation;
|
|
elseif j==8
|
|
n = n_relationship; p=relationship;
|
|
elseif j==9
|
|
n = n_race; p=race;
|
|
elseif j==10
|
|
n = n_sex; p=sex;
|
|
elseif j==14
|
|
n = n_country; p=country;
|
|
end
|
|
a = 2/(n-1); b= (1+n)/(1-n);
|
|
for k=1:n
|
|
if strcmp(s, p(k))
|
|
x(i_fich,i,j) = a*k + b; break
|
|
end
|
|
end
|
|
end
|
|
else
|
|
x(i_fich,i,j) = fscanf(f,'%g',1); fscanf(f,'%c',1);
|
|
end
|
|
% printf('%g ', x(i_fich,i,j))
|
|
end
|
|
s = fscanf(f,'%s',1); fscanf(f,'%c',1);
|
|
if strcmp(s, '<=50K')
|
|
cl(i_fich,i)=0;
|
|
elseif strcmp(s, '>50K')
|
|
cl(i_fich,i)=1;
|
|
else
|
|
error('clase %s descoñecida\n', s)
|
|
end
|
|
% printf('\n')
|
|
% disp(x(i_fich,i,:)); disp(cl(i_fich,i))
|
|
end
|
|
fclose(f);
|
|
end |