First commit
This commit is contained in:
350
libsvm-3.36/java/svm_scale.java
Normal file
350
libsvm-3.36/java/svm_scale.java
Normal file
@@ -0,0 +1,350 @@
|
||||
import libsvm.*;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.text.DecimalFormat;
|
||||
|
||||
class svm_scale
|
||||
{
|
||||
private String line = null;
|
||||
private double lower = -1.0;
|
||||
private double upper = 1.0;
|
||||
private double y_lower;
|
||||
private double y_upper;
|
||||
private boolean y_scaling = false;
|
||||
private double[] feature_max;
|
||||
private double[] feature_min;
|
||||
private double y_max = -Double.MAX_VALUE;
|
||||
private double y_min = Double.MAX_VALUE;
|
||||
private int max_index;
|
||||
private long num_nonzeros = 0;
|
||||
private long new_num_nonzeros = 0;
|
||||
|
||||
private static void exit_with_help()
|
||||
{
|
||||
System.out.print(
|
||||
"Usage: svm-scale [options] data_filename\n"
|
||||
+"options:\n"
|
||||
+"-l lower : x scaling lower limit (default -1)\n"
|
||||
+"-u upper : x scaling upper limit (default +1)\n"
|
||||
+"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
|
||||
+"-s save_filename : save scaling parameters to save_filename\n"
|
||||
+"-r restore_filename : restore scaling parameters from restore_filename\n"
|
||||
);
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
|
||||
{
|
||||
fp.close();
|
||||
return new BufferedReader(new FileReader(filename));
|
||||
}
|
||||
|
||||
private void output_target(double value)
|
||||
{
|
||||
if(y_scaling)
|
||||
{
|
||||
if(value == y_min)
|
||||
value = y_lower;
|
||||
else if(value == y_max)
|
||||
value = y_upper;
|
||||
else
|
||||
value = y_lower + (y_upper-y_lower) *
|
||||
(value-y_min) / (y_max-y_min);
|
||||
}
|
||||
|
||||
System.out.print(value + " ");
|
||||
}
|
||||
|
||||
private void output(int index, double value)
|
||||
{
|
||||
/* skip single-valued attribute */
|
||||
if(feature_max[index] == feature_min[index])
|
||||
return;
|
||||
|
||||
if(value == feature_min[index])
|
||||
value = lower;
|
||||
else if(value == feature_max[index])
|
||||
value = upper;
|
||||
else
|
||||
value = lower + (upper-lower) *
|
||||
(value-feature_min[index])/
|
||||
(feature_max[index]-feature_min[index]);
|
||||
|
||||
if(value != 0)
|
||||
{
|
||||
System.out.print(index + ":" + value + " ");
|
||||
new_num_nonzeros++;
|
||||
}
|
||||
}
|
||||
|
||||
private String readline(BufferedReader fp) throws IOException
|
||||
{
|
||||
line = fp.readLine();
|
||||
return line;
|
||||
}
|
||||
|
||||
private void run(String []argv) throws IOException
|
||||
{
|
||||
int i,index;
|
||||
BufferedReader fp = null, fp_restore = null;
|
||||
String save_filename = null;
|
||||
String restore_filename = null;
|
||||
String data_filename = null;
|
||||
|
||||
|
||||
for(i=0;i<argv.length;i++)
|
||||
{
|
||||
if (argv[i].charAt(0) != '-') break;
|
||||
++i;
|
||||
switch(argv[i-1].charAt(1))
|
||||
{
|
||||
case 'l': lower = Double.parseDouble(argv[i]); break;
|
||||
case 'u': upper = Double.parseDouble(argv[i]); break;
|
||||
case 'y':
|
||||
y_lower = Double.parseDouble(argv[i]);
|
||||
++i;
|
||||
y_upper = Double.parseDouble(argv[i]);
|
||||
y_scaling = true;
|
||||
break;
|
||||
case 's': save_filename = argv[i]; break;
|
||||
case 'r': restore_filename = argv[i]; break;
|
||||
default:
|
||||
System.err.println("unknown option");
|
||||
exit_with_help();
|
||||
}
|
||||
}
|
||||
|
||||
if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
|
||||
{
|
||||
System.err.println("inconsistent lower/upper specification");
|
||||
System.exit(1);
|
||||
}
|
||||
if(restore_filename != null && save_filename != null)
|
||||
{
|
||||
System.err.println("cannot use -r and -s simultaneously");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
if(argv.length != i+1)
|
||||
exit_with_help();
|
||||
|
||||
data_filename = argv[i];
|
||||
try {
|
||||
fp = new BufferedReader(new FileReader(data_filename));
|
||||
} catch (Exception e) {
|
||||
System.err.println("can't open file " + data_filename);
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
/* assumption: min index of attributes is 1 */
|
||||
/* pass 1: find out max index of attributes */
|
||||
max_index = 0;
|
||||
|
||||
if(restore_filename != null)
|
||||
{
|
||||
int idx, c;
|
||||
|
||||
try {
|
||||
fp_restore = new BufferedReader(new FileReader(restore_filename));
|
||||
}
|
||||
catch (Exception e) {
|
||||
System.err.println("can't open file " + restore_filename);
|
||||
System.exit(1);
|
||||
}
|
||||
if((c = fp_restore.read()) == 'y')
|
||||
{
|
||||
fp_restore.readLine();
|
||||
fp_restore.readLine();
|
||||
fp_restore.readLine();
|
||||
}
|
||||
fp_restore.readLine();
|
||||
fp_restore.readLine();
|
||||
|
||||
String restore_line = null;
|
||||
while((restore_line = fp_restore.readLine())!=null)
|
||||
{
|
||||
StringTokenizer st2 = new StringTokenizer(restore_line);
|
||||
idx = Integer.parseInt(st2.nextToken());
|
||||
max_index = Math.max(max_index, idx);
|
||||
}
|
||||
fp_restore = rewind(fp_restore, restore_filename);
|
||||
}
|
||||
|
||||
while (readline(fp) != null)
|
||||
{
|
||||
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
||||
st.nextToken();
|
||||
while(st.hasMoreTokens())
|
||||
{
|
||||
index = Integer.parseInt(st.nextToken());
|
||||
max_index = Math.max(max_index, index);
|
||||
st.nextToken();
|
||||
num_nonzeros++;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
feature_max = new double[(max_index+1)];
|
||||
feature_min = new double[(max_index+1)];
|
||||
} catch(OutOfMemoryError e) {
|
||||
System.err.println("can't allocate enough memory");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
for(i=0;i<=max_index;i++)
|
||||
{
|
||||
feature_max[i] = -Double.MAX_VALUE;
|
||||
feature_min[i] = Double.MAX_VALUE;
|
||||
}
|
||||
|
||||
fp = rewind(fp, data_filename);
|
||||
|
||||
/* pass 2: find out min/max value */
|
||||
while(readline(fp) != null)
|
||||
{
|
||||
int next_index = 1;
|
||||
double target;
|
||||
double value;
|
||||
|
||||
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
||||
target = Double.parseDouble(st.nextToken());
|
||||
y_max = Math.max(y_max, target);
|
||||
y_min = Math.min(y_min, target);
|
||||
|
||||
while (st.hasMoreTokens())
|
||||
{
|
||||
index = Integer.parseInt(st.nextToken());
|
||||
value = Double.parseDouble(st.nextToken());
|
||||
|
||||
for (i = next_index; i<index; i++)
|
||||
{
|
||||
feature_max[i] = Math.max(feature_max[i], 0);
|
||||
feature_min[i] = Math.min(feature_min[i], 0);
|
||||
}
|
||||
|
||||
feature_max[index] = Math.max(feature_max[index], value);
|
||||
feature_min[index] = Math.min(feature_min[index], value);
|
||||
next_index = index + 1;
|
||||
}
|
||||
|
||||
for(i=next_index;i<=max_index;i++)
|
||||
{
|
||||
feature_max[i] = Math.max(feature_max[i], 0);
|
||||
feature_min[i] = Math.min(feature_min[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
fp = rewind(fp, data_filename);
|
||||
|
||||
/* pass 2.5: save/restore feature_min/feature_max */
|
||||
if(restore_filename != null)
|
||||
{
|
||||
// fp_restore rewinded in finding max_index
|
||||
int idx, c;
|
||||
double fmin, fmax;
|
||||
|
||||
fp_restore.mark(2); // for reset
|
||||
if((c = fp_restore.read()) == 'y')
|
||||
{
|
||||
fp_restore.readLine(); // pass the '\n' after 'y'
|
||||
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
|
||||
y_lower = Double.parseDouble(st.nextToken());
|
||||
y_upper = Double.parseDouble(st.nextToken());
|
||||
st = new StringTokenizer(fp_restore.readLine());
|
||||
y_min = Double.parseDouble(st.nextToken());
|
||||
y_max = Double.parseDouble(st.nextToken());
|
||||
y_scaling = true;
|
||||
}
|
||||
else
|
||||
fp_restore.reset();
|
||||
|
||||
if(fp_restore.read() == 'x') {
|
||||
fp_restore.readLine(); // pass the '\n' after 'x'
|
||||
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
|
||||
lower = Double.parseDouble(st.nextToken());
|
||||
upper = Double.parseDouble(st.nextToken());
|
||||
String restore_line = null;
|
||||
while((restore_line = fp_restore.readLine())!=null)
|
||||
{
|
||||
StringTokenizer st2 = new StringTokenizer(restore_line);
|
||||
idx = Integer.parseInt(st2.nextToken());
|
||||
fmin = Double.parseDouble(st2.nextToken());
|
||||
fmax = Double.parseDouble(st2.nextToken());
|
||||
if (idx <= max_index)
|
||||
{
|
||||
feature_min[idx] = fmin;
|
||||
feature_max[idx] = fmax;
|
||||
}
|
||||
}
|
||||
}
|
||||
fp_restore.close();
|
||||
}
|
||||
|
||||
if(save_filename != null)
|
||||
{
|
||||
Formatter formatter = new Formatter(new StringBuilder());
|
||||
BufferedWriter fp_save = null;
|
||||
|
||||
try {
|
||||
fp_save = new BufferedWriter(new FileWriter(save_filename));
|
||||
} catch(IOException e) {
|
||||
System.err.println("can't open file " + save_filename);
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
if(y_scaling)
|
||||
{
|
||||
formatter.format("y\n");
|
||||
formatter.format("%.16g %.16g\n", y_lower, y_upper);
|
||||
formatter.format("%.16g %.16g\n", y_min, y_max);
|
||||
}
|
||||
formatter.format("x\n");
|
||||
formatter.format("%.16g %.16g\n", lower, upper);
|
||||
for(i=1;i<=max_index;i++)
|
||||
{
|
||||
if(feature_min[i] != feature_max[i])
|
||||
formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
|
||||
}
|
||||
fp_save.write(formatter.toString());
|
||||
fp_save.close();
|
||||
}
|
||||
|
||||
/* pass 3: scale */
|
||||
while(readline(fp) != null)
|
||||
{
|
||||
int next_index = 1;
|
||||
double target;
|
||||
double value;
|
||||
|
||||
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
||||
target = Double.parseDouble(st.nextToken());
|
||||
output_target(target);
|
||||
while(st.hasMoreElements())
|
||||
{
|
||||
index = Integer.parseInt(st.nextToken());
|
||||
value = Double.parseDouble(st.nextToken());
|
||||
for (i = next_index; i<index; i++)
|
||||
output(i, 0);
|
||||
output(index, value);
|
||||
next_index = index + 1;
|
||||
}
|
||||
|
||||
for(i=next_index;i<= max_index;i++)
|
||||
output(i, 0);
|
||||
System.out.print("\n");
|
||||
}
|
||||
if (new_num_nonzeros > num_nonzeros)
|
||||
System.err.print(
|
||||
"WARNING: original #nonzeros " + num_nonzeros+"\n"
|
||||
+" new #nonzeros " + new_num_nonzeros+"\n"
|
||||
+"Use -l 0 if many original feature values are zeros\n");
|
||||
|
||||
fp.close();
|
||||
}
|
||||
|
||||
public static void main(String argv[]) throws IOException
|
||||
{
|
||||
svm_scale s = new svm_scale();
|
||||
s.run(argv);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user