First commit

This commit is contained in:
2025-06-22 00:31:33 +02:00
parent a52c20d1fb
commit 4bdbcad256
110 changed files with 31991 additions and 1 deletions

View File

@@ -0,0 +1,41 @@
# This Makefile is used under Linux
MATLABDIR ?= /usr/local/matlab
CXX ?= g++
#CXX = g++-3.3
CC ?= gcc
CFLAGS = -Wall -Wconversion -O3 -fPIC -I$(MATLABDIR)/extern/include -I..
MEX = $(MATLABDIR)/bin/mex
MEX_OPTION = CC="$(CXX)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CFLAGS)"
# comment the following line if you use MATLAB on a 32-bit computer
MEX_OPTION += -largeArrayDims
MEX_EXT = $(shell $(MATLABDIR)/bin/mexext)
all: matlab
matlab: binary
octave:
@echo "please type make under Octave"
binary: train.$(MEX_EXT) predict.$(MEX_EXT) libsvmread.$(MEX_EXT) libsvmwrite.$(MEX_EXT)
train.$(MEX_EXT): train.c ../linear.h ../newton.cpp ../linear.cpp linear_model_matlab.c \
../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
$(MEX) $(MEX_OPTION) train.c ../newton.cpp ../linear.cpp linear_model_matlab.c \
../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
predict.$(MEX_EXT): predict.c ../linear.h ../newton.cpp ../linear.cpp linear_model_matlab.c \
../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
$(MEX) $(MEX_OPTION) predict.c ../newton.cpp ../linear.cpp linear_model_matlab.c \
../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
libsvmread.$(MEX_EXT): libsvmread.c
$(MEX) $(MEX_OPTION) libsvmread.c
libsvmwrite.$(MEX_EXT): libsvmwrite.c
$(MEX) $(MEX_OPTION) libsvmwrite.c
clean:
rm -f *~ *.o *.mex* *.obj

205
liblinear-2.49/matlab/README Executable file
View File

@@ -0,0 +1,205 @@
--------------------------------------------
--- MATLAB/OCTAVE interface of LIBLINEAR ---
--------------------------------------------
Table of Contents
=================
- Introduction
- Installation
- Usage
- Returned Model Structure
- Other Utilities
- Examples
- Additional Information
Introduction
============
This tool provides a simple interface to LIBLINEAR, a library for
large-scale regularized linear classification and regression
(http://www.csie.ntu.edu.tw/~cjlin/liblinear). It is very easy to use
as the usage and the way of specifying parameters are the same as that
of LIBLINEAR.
Installation
============
On Windows systems, starting from version 2.48, we no longer provide
pre-built mex files. If you would like to build the package, please
rely on the following steps.
We recommend using make.m on both MATLAB and OCTAVE. Just type 'make'
to build 'libsvmread.mex', 'libsvmwrite.mex', 'train.mex', and
'predict.mex'.
On MATLAB or Octave:
>> make
If make.m does not work on MATLAB (especially for Windows), try 'mex
-setup' to choose a suitable compiler for mex. Make sure your compiler
is accessible and workable. Then type 'make' to do the installation.
Example:
matlab>> mex -setup
MATLAB will choose the default compiler. If you have multiple compliers,
a list is given and you can choose one from the list. For more details,
please check the following page:
https://www.mathworks.com/help/matlab/matlab_external/choose-c-or-c-compilers.html
On Windows, make.m has been tested via using Visual C++.
On Unix systems, if neither make.m nor 'mex -setup' works, please use
Makefile and type 'make' in a command window. Note that we assume
your MATLAB is installed in '/usr/local/matlab'. If not, please change
MATLABDIR in Makefile.
Example:
linux> make
To use octave, type 'make octave':
Example:
linux> make octave
For a list of supported/compatible compilers for MATLAB, please check
the following page:
http://www.mathworks.com/support/compilers/current_release/
Usage
=====
matlab> model = train(training_label_vector, training_instance_matrix [,'liblinear_options', 'col']);
-training_label_vector:
An m by 1 vector of training labels. (type must be double)
-training_instance_matrix:
An m by n matrix of m training instances with n features.
It must be a sparse matrix. (type must be double)
-liblinear_options:
A string of training options in the same format as that of LIBLINEAR.
-col:
if 'col' is set, each column of training_instance_matrix is a data instance. Otherwise each row is a data instance.
matlab> [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model [, 'liblinear_options', 'col']);
matlab> [predicted_label] = predict(testing_label_vector, testing_instance_matrix, model [, 'liblinear_options', 'col']);
-testing_label_vector:
An m by 1 vector of prediction labels. If labels of test
data are unknown, simply use any random values. (type must be double)
-testing_instance_matrix:
An m by n matrix of m testing instances with n features.
It must be a sparse matrix. (type must be double)
-model:
The output of train.
-liblinear_options:
A string of testing options in the same format as that of LIBLINEAR.
-col:
if 'col' is set, each column of testing_instance_matrix is a data instance. Otherwise each row is a data instance.
Returned Model Structure
========================
The 'train' function returns a model which can be used for future
prediction. It is a structure and is organized as [Parameters, nr_class,
nr_feature, bias, Label, w, rho]:
-Parameters: Parameters (now only solver type is provided)
-nr_class: number of classes; = 2 for regression
-nr_feature: number of features in training data (without including the bias term)
-bias: If >= 0, we assume one additional feature is added to the end
of each data instance.
-Label: label of each class; empty for regression
-w: a nr_w-by-n matrix for the weights, where n is nr_feature
or nr_feature+1 depending on the existence of the bias term.
nr_w is 1 if nr_class=2 and -s is not 4 (i.e., not
multi-class svm by Crammer and Singer). It is
nr_class otherwise.
-rho: the bias term of one-class SVM.
If the '-v' option is specified, cross validation is conducted and the
returned model is just a scalar: cross-validation accuracy for
classification and mean-squared error for regression.
If the '-C' option is specified, best parameters are found by cross
validation. The parameter selection utility is supported only by -s 0,
-s 2 (for finding C) and -s 11 (for finding C, p). The returned
model is a three dimensional vector with the best C, the best p, and
the corresponding cross-validation accuracy or mean squared error. The
returned best p for -s 0 and -s 2 is set to -1 because the p parameter
is not used by classification models.
Result of Prediction
====================
The function 'predict' has three outputs. The first one,
predicted_label, is a vector of predicted labels. The second output,
accuracy, is a vector including accuracy (for classification), mean
squared error, and squared correlation coefficient (for regression).
The third is a matrix containing decision values or probability
estimates (if '-b 1' is specified). If k is the number of classes
and k' is the number of classifiers (k'=1 if k=2, otherwise k'=k), for decision values,
each row includes results of k' binary linear classifiers. For probabilities,
each row contains k values indicating the probability that the testing instance is in
each class. Note that the order of classes here is the same as 'Label'
field in the model structure.
Other Utilities
===============
A matlab function libsvmread reads files in LIBSVM format:
[label_vector, instance_matrix] = libsvmread('data.txt');
Two outputs are labels and instances, which can then be used as inputs
of svmtrain or svmpredict.
A matlab function libsvmwrite writes Matlab matrix to a file in LIBSVM format:
libsvmwrite('data.txt', label_vector, instance_matrix]
The instance_matrix must be a sparse matrix. (type must be double)
For windows, `libsvmread.mexw64' and `libsvmwrite.mexw64' are ready in
the directory `..\windows'.
These codes are prepared by Rong-En Fan and Kai-Wei Chang from National
Taiwan University.
Examples
========
Train and test on the provided data heart_scale:
matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
matlab> model = train(heart_scale_label, heart_scale_inst, '-c 1');
matlab> [predict_label, accuracy, dec_values] = predict(heart_scale_label, heart_scale_inst, model); % test the training data
Note that for testing, you can put anything in the testing_label_vector.
For probability estimates, you need '-b 1' only in the testing phase:
matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
Use the best parameter to train (C for -s 0, 2 and C, p for -s 11):
matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0
Additional Information
======================
Please cite LIBLINEAR as follows
R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin.
LIBLINEAR: A Library for Large Linear Classification, Journal of
Machine Learning Research 9(2008), 1871-1874.Software available at
http://www.csie.ntu.edu.tw/~cjlin/liblinear
For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>.

View File

@@ -0,0 +1,212 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include "mex.h"
#ifdef MX_API_VER
#if MX_API_VER < 0x07030000
typedef int mwIndex;
#endif
#endif
#ifndef max
#define max(x,y) (((x)>(y))?(x):(y))
#endif
#ifndef min
#define min(x,y) (((x)<(y))?(x):(y))
#endif
void exit_with_help()
{
mexPrintf(
"Usage: [label_vector, instance_matrix] = libsvmread('filename');\n"
);
}
static void fake_answer(int nlhs, mxArray *plhs[])
{
int i;
for(i=0;i<nlhs;i++)
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
}
static char *line;
static int max_line_len;
static char* readline(FILE *input)
{
int len;
if(fgets(line,max_line_len,input) == NULL)
return NULL;
while(strrchr(line,'\n') == NULL)
{
max_line_len *= 2;
line = (char *) realloc(line, max_line_len);
len = (int) strlen(line);
if(fgets(line+len,max_line_len-len,input) == NULL)
break;
}
return line;
}
// read in a problem (in libsvm format)
void read_problem(const char *filename, int nlhs, mxArray *plhs[])
{
int max_index, min_index, inst_max_index;
size_t elements, k, i, l=0;
FILE *fp = fopen(filename,"r");
char *endptr;
mwIndex *ir, *jc;
double *labels, *samples;
if(fp == NULL)
{
mexPrintf("can't open input file %s\n",filename);
fake_answer(nlhs, plhs);
return;
}
max_line_len = 1024;
line = (char *) malloc(max_line_len*sizeof(char));
max_index = 0;
min_index = 1; // our index starts from 1
elements = 0;
while(readline(fp) != NULL)
{
char *idx, *val;
// features
int index = 0;
inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
strtok(line," \t"); // label
while (1)
{
idx = strtok(NULL,":"); // index:value
val = strtok(NULL," \t");
if(val == NULL)
break;
errno = 0;
index = (int) strtol(idx,&endptr,10);
if(endptr == idx || errno != 0 || *endptr != '\0' || index <= inst_max_index)
{
mexPrintf("Wrong input format at line %d\n",l+1);
fake_answer(nlhs, plhs);
return;
}
else
inst_max_index = index;
min_index = min(min_index, index);
elements++;
}
max_index = max(max_index, inst_max_index);
l++;
}
rewind(fp);
// y
plhs[0] = mxCreateDoubleMatrix(l, 1, mxREAL);
// x^T
if (min_index <= 0)
plhs[1] = mxCreateSparse(max_index-min_index+1, l, elements, mxREAL);
else
plhs[1] = mxCreateSparse(max_index, l, elements, mxREAL);
labels = mxGetPr(plhs[0]);
samples = mxGetPr(plhs[1]);
ir = mxGetIr(plhs[1]);
jc = mxGetJc(plhs[1]);
k=0;
for(i=0;i<l;i++)
{
char *idx, *val, *label;
jc[i] = k;
readline(fp);
label = strtok(line," \t\n");
if(label == NULL)
{
mexPrintf("Empty line at line %d\n",i+1);
fake_answer(nlhs, plhs);
return;
}
labels[i] = strtod(label,&endptr);
if(endptr == label || *endptr != '\0')
{
mexPrintf("Wrong input format at line %d\n",i+1);
fake_answer(nlhs, plhs);
return;
}
// features
while(1)
{
idx = strtok(NULL,":");
val = strtok(NULL," \t");
if(val == NULL)
break;
ir[k] = (mwIndex) (strtol(idx,&endptr,10) - min_index); // precomputed kernel has <index> start from 0
errno = 0;
samples[k] = strtod(val,&endptr);
if (endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
{
mexPrintf("Wrong input format at line %d\n",i+1);
fake_answer(nlhs, plhs);
return;
}
++k;
}
}
jc[l] = k;
fclose(fp);
free(line);
{
mxArray *rhs[1], *lhs[1];
rhs[0] = plhs[1];
if(mexCallMATLAB(1, lhs, 1, rhs, "transpose"))
{
mexPrintf("Error: cannot transpose problem\n");
fake_answer(nlhs, plhs);
return;
}
plhs[1] = lhs[0];
}
}
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[] )
{
#define filename_size 256
char filename[filename_size];
if(nrhs != 1 || nlhs != 2)
{
exit_with_help();
fake_answer(nlhs, plhs);
return;
}
if(mxGetString(prhs[0], filename, filename_size) == 1){
mexPrintf("Error: wrong or too long filename\n");
fake_answer(nlhs, plhs);
return;
}
read_problem(filename, nlhs, plhs);
return;
}

View File

@@ -0,0 +1,119 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mex.h"
#ifdef MX_API_VER
#if MX_API_VER < 0x07030000
typedef int mwIndex;
#endif
#endif
void exit_with_help()
{
mexPrintf(
"Usage: libsvmwrite('filename', label_vector, instance_matrix);\n"
);
}
static void fake_answer(int nlhs, mxArray *plhs[])
{
int i;
for(i=0;i<nlhs;i++)
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
}
void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *instance_mat)
{
FILE *fp = fopen(filename,"w");
mwIndex *ir, *jc, k, low, high;
size_t i, l, label_vector_row_num;
double *samples, *labels;
mxArray *instance_mat_col; // instance sparse matrix in column format
if(fp ==NULL)
{
mexPrintf("can't open output file %s\n",filename);
return;
}
// transpose instance matrix
{
mxArray *prhs[1], *plhs[1];
prhs[0] = mxDuplicateArray(instance_mat);
if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
{
mexPrintf("Error: cannot transpose instance matrix\n");
return;
}
instance_mat_col = plhs[0];
mxDestroyArray(prhs[0]);
}
// the number of instance
l = mxGetN(instance_mat_col);
label_vector_row_num = mxGetM(label_vec);
if(label_vector_row_num!=l)
{
mexPrintf("Length of label vector does not match # of instances.\n");
return;
}
// each column is one instance
labels = mxGetPr(label_vec);
samples = mxGetPr(instance_mat_col);
ir = mxGetIr(instance_mat_col);
jc = mxGetJc(instance_mat_col);
for(i=0;i<l;i++)
{
fprintf(fp,"%.17g", labels[i]);
low = jc[i], high = jc[i+1];
for(k=low;k<high;k++)
fprintf(fp," %lu:%g", (size_t)ir[k]+1, samples[k]);
fprintf(fp,"\n");
}
fclose(fp);
return;
}
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[] )
{
if(nlhs > 0)
{
exit_with_help();
fake_answer(nlhs, plhs);
return;
}
// Transform the input Matrix to libsvm format
if(nrhs == 3)
{
char filename[256];
if(!mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2]))
{
mexPrintf("Error: label vector and instance matrix must be double\n");
return;
}
mxGetString(prhs[0], filename, mxGetN(prhs[0])+1);
if(mxIsSparse(prhs[2]))
libsvmwrite(filename, prhs[1], prhs[2]);
else
{
mexPrintf("Instance_matrix must be sparse\n");
return;
}
}
else
{
exit_with_help();
return;
}
}

View File

@@ -0,0 +1,190 @@
#include <stdlib.h>
#include <string.h>
#include "linear.h"
#include "mex.h"
#ifdef MX_API_VER
#if MX_API_VER < 0x07030000
typedef int mwIndex;
#endif
#endif
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
#define NUM_OF_RETURN_FIELD 7
static const char *field_names[] = {
"Parameters",
"nr_class",
"nr_feature",
"bias",
"Label",
"w",
"rho",
};
const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_)
{
int i;
int nr_w;
double *ptr;
mxArray *return_model, **rhs;
int out_id = 0;
int n, w_size;
rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD);
// Parameters
// for now, only solver_type is needed
rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
ptr = mxGetPr(rhs[out_id]);
ptr[0] = model_->param.solver_type;
out_id++;
// nr_class
rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
ptr = mxGetPr(rhs[out_id]);
ptr[0] = model_->nr_class;
out_id++;
if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
nr_w=1;
else
nr_w=model_->nr_class;
// nr_feature
rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
ptr = mxGetPr(rhs[out_id]);
ptr[0] = model_->nr_feature;
out_id++;
// bias
rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
ptr = mxGetPr(rhs[out_id]);
ptr[0] = model_->bias;
out_id++;
if(model_->bias>=0)
n=model_->nr_feature+1;
else
n=model_->nr_feature;
w_size = n;
// Label
if(model_->label)
{
rhs[out_id] = mxCreateDoubleMatrix(model_->nr_class, 1, mxREAL);
ptr = mxGetPr(rhs[out_id]);
for(i = 0; i < model_->nr_class; i++)
ptr[i] = model_->label[i];
}
else
rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
out_id++;
// w
rhs[out_id] = mxCreateDoubleMatrix(nr_w, w_size, mxREAL);
ptr = mxGetPr(rhs[out_id]);
for(i = 0; i < w_size*nr_w; i++)
ptr[i]=model_->w[i];
out_id++;
// rho
rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
ptr = mxGetPr(rhs[out_id]);
ptr[0] = model_->rho;
out_id++;
/* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */
return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names);
/* Fill struct matrix with input arguments */
for(i = 0; i < NUM_OF_RETURN_FIELD; i++)
mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i]));
/* return */
plhs[0] = return_model;
mxFree(rhs);
return NULL;
}
const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct)
{
int i, num_of_fields;
int nr_w;
double *ptr;
int id = 0;
int n, w_size;
mxArray **rhs;
num_of_fields = mxGetNumberOfFields(matlab_struct);
rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields);
for(i=0;i<num_of_fields;i++)
rhs[i] = mxGetFieldByNumber(matlab_struct, 0, i);
model_->nr_class=0;
nr_w=0;
model_->nr_feature=0;
model_->w=NULL;
model_->label=NULL;
// Parameters
ptr = mxGetPr(rhs[id]);
model_->param.solver_type = (int)ptr[0];
id++;
// nr_class
ptr = mxGetPr(rhs[id]);
model_->nr_class = (int)ptr[0];
id++;
if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
nr_w=1;
else
nr_w=model_->nr_class;
// nr_feature
ptr = mxGetPr(rhs[id]);
model_->nr_feature = (int)ptr[0];
id++;
// bias
ptr = mxGetPr(rhs[id]);
model_->bias = ptr[0];
id++;
if(model_->bias>=0)
n=model_->nr_feature+1;
else
n=model_->nr_feature;
w_size = n;
// Label
if(mxIsEmpty(rhs[id]) == 0)
{
model_->label = Malloc(int, model_->nr_class);
ptr = mxGetPr(rhs[id]);
for(i=0;i<model_->nr_class;i++)
model_->label[i] = (int)ptr[i];
}
id++;
// w
ptr = mxGetPr(rhs[id]);
model_->w=Malloc(double, w_size*nr_w);
for(i = 0; i < w_size*nr_w; i++)
model_->w[i]=ptr[i];
id++;
// rho
ptr = mxGetPr(rhs[id]);
model_->rho = ptr[0];
id++;
mxFree(rhs);
return NULL;
}

View File

@@ -0,0 +1,2 @@
const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_);
const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct);

View File

@@ -0,0 +1,22 @@
% This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
function make()
try
% This part is for OCTAVE
if(exist('OCTAVE_VERSION', 'builtin'))
mex libsvmread.c
mex libsvmwrite.c
mex -I.. train.c linear_model_matlab.c ../linear.cpp ../newton.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
mex -I.. predict.c linear_model_matlab.c ../linear.cpp ../newton.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
% This part is for MATLAB
% Add -largeArrayDims on 64-bit machines of MATLAB
else
mex -largeArrayDims libsvmread.c
mex -largeArrayDims libsvmwrite.c
mex -I.. -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../newton.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
mex -I.. -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../newton.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
end
catch err
fprintf('Error: %s failed (line %d)\n', err.stack(1).file, err.stack(1).line);
disp(err.message);
fprintf('=> Please check README for detailed instructions.\n');
end

View File

@@ -0,0 +1,341 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "linear.h"
#include "mex.h"
#include "linear_model_matlab.h"
#ifdef MX_API_VER
#if MX_API_VER < 0x07030000
typedef int mwIndex;
#endif
#endif
#define CMD_LEN 2048
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
int print_null(const char *s,...) {return 0;}
int (*info)(const char *fmt,...);
int col_format_flag;
void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias)
{
int j;
mwIndex *ir, *jc, low, high, i;
double *samples;
ir = mxGetIr(prhs);
jc = mxGetJc(prhs);
samples = mxGetPr(prhs);
// each column is one instance
j = 0;
low = jc[index], high = jc[index+1];
for(i=low; i<high && (int) (ir[i])<feature_number; i++)
{
x[j].index = (int) ir[i]+1;
x[j].value = samples[i];
j++;
}
if(bias>=0)
{
x[j].index = feature_number+1;
x[j].value = bias;
j++;
}
x[j].index = -1;
}
static void fake_answer(int nlhs, mxArray *plhs[])
{
int i;
for(i=0;i<nlhs;i++)
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
}
void do_predict(int nlhs, mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
{
int label_vector_row_num, label_vector_col_num;
int feature_number, testing_instance_number;
int instance_index;
double *ptr_label, *ptr_predict_label;
double *ptr_prob_estimates, *ptr_dec_values, *ptr;
struct feature_node *x;
mxArray *pplhs[1]; // instance sparse matrix in row format
mxArray *tplhs[3]; // temporary storage for plhs[]
int correct = 0;
int total = 0;
double error = 0;
double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
int nr_class=get_nr_class(model_);
int nr_w;
double *prob_estimates=NULL;
if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
nr_w=1;
else
nr_w=nr_class;
// prhs[1] = testing instance matrix
feature_number = get_nr_feature(model_);
testing_instance_number = (int) mxGetM(prhs[1]);
if(col_format_flag)
{
feature_number = (int) mxGetM(prhs[1]);
testing_instance_number = (int) mxGetN(prhs[1]);
}
label_vector_row_num = (int) mxGetM(prhs[0]);
label_vector_col_num = (int) mxGetN(prhs[0]);
if(label_vector_row_num!=testing_instance_number)
{
mexPrintf("Length of label vector does not match # of instances.\n");
fake_answer(nlhs, plhs);
return;
}
if(label_vector_col_num!=1)
{
mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
fake_answer(nlhs, plhs);
return;
}
ptr_label = mxGetPr(prhs[0]);
// transpose instance matrix
if(col_format_flag)
pplhs[0] = (mxArray *)prhs[1];
else
{
mxArray *pprhs[1];
pprhs[0] = mxDuplicateArray(prhs[1]);
if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
{
mexPrintf("Error: cannot transpose testing instance matrix\n");
fake_answer(nlhs, plhs);
return;
}
}
prob_estimates = Malloc(double, nr_class);
tplhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
if(predict_probability_flag)
tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
else
tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL);
ptr_predict_label = mxGetPr(tplhs[0]);
ptr_prob_estimates = mxGetPr(tplhs[2]);
ptr_dec_values = mxGetPr(tplhs[2]);
x = Malloc(struct feature_node, feature_number+2);
for(instance_index=0;instance_index<testing_instance_number;instance_index++)
{
int i;
double target_label, predict_label;
target_label = ptr_label[instance_index];
// prhs[1] and prhs[1]^T are sparse
read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);
if(predict_probability_flag)
{
predict_label = predict_probability(model_, x, prob_estimates);
ptr_predict_label[instance_index] = predict_label;
for(i=0;i<nr_class;i++)
ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
}
else
{
double *dec_values = Malloc(double, nr_class);
predict_label = predict_values(model_, x, dec_values);
ptr_predict_label[instance_index] = predict_label;
for(i=0;i<nr_w;i++)
ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
free(dec_values);
}
if(predict_label == target_label)
++correct;
error += (predict_label-target_label)*(predict_label-target_label);
sump += predict_label;
sumt += target_label;
sumpp += predict_label*predict_label;
sumtt += target_label*target_label;
sumpt += predict_label*target_label;
++total;
}
if(check_regression_model(model_))
{
info("Mean squared error = %g (regression)\n",error/total);
info("Squared correlation coefficient = %g (regression)\n",
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
);
}
else
info("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
// return accuracy, mean squared error, squared correlation coefficient
tplhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
ptr = mxGetPr(tplhs[1]);
ptr[0] = (double)correct/total*100;
ptr[1] = error/total;
ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));
free(x);
if(prob_estimates != NULL)
free(prob_estimates);
switch(nlhs)
{
case 3:
plhs[2] = tplhs[2];
plhs[1] = tplhs[1];
case 1:
case 0:
plhs[0] = tplhs[0];
}
}
void exit_with_help()
{
mexPrintf(
"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
" [predicted_label] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
"liblinear_options:\n"
"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
"-q quiet mode (no outputs)\n"
"col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n"
"Returns:\n"
" predicted_label: prediction output vector.\n"
" accuracy: a vector with accuracy, mean squared error, squared correlation coefficient.\n"
" prob_estimates: If selected, probability estimate vector.\n"
);
}
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[] )
{
int prob_estimate_flag = 0;
struct model *model_;
char cmd[CMD_LEN];
info = &mexPrintf;
col_format_flag = 0;
if(nlhs == 2 || nlhs > 3 || nrhs > 5 || nrhs < 3)
{
exit_with_help();
fake_answer(nlhs, plhs);
return;
}
if(nrhs == 5)
{
mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
if(strcmp(cmd, "col") == 0)
{
col_format_flag = 1;
}
}
if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
mexPrintf("Error: label vector and instance matrix must be double\n");
fake_answer(nlhs, plhs);
return;
}
if(mxIsStruct(prhs[2]))
{
const char *error_msg;
// parse options
if(nrhs>=4)
{
int i, argc = 1;
char *argv[CMD_LEN/2];
// put options in argv[]
mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1);
if((argv[argc] = strtok(cmd, " ")) != NULL)
while((argv[++argc] = strtok(NULL, " ")) != NULL)
;
for(i=1;i<argc;i++)
{
if(argv[i][0] != '-') break;
++i;
if(i>=argc && argv[i-1][1] != 'q')
{
exit_with_help();
fake_answer(nlhs, plhs);
return;
}
switch(argv[i-1][1])
{
case 'b':
prob_estimate_flag = atoi(argv[i]);
break;
case 'q':
info = &print_null;
i--;
break;
default:
mexPrintf("unknown option\n");
exit_with_help();
fake_answer(nlhs, plhs);
return;
}
}
}
model_ = Malloc(struct model, 1);
error_msg = matlab_matrix_to_model(model_, prhs[2]);
if(error_msg)
{
mexPrintf("Error: can't read model: %s\n", error_msg);
free_and_destroy_model(&model_);
fake_answer(nlhs, plhs);
return;
}
if(prob_estimate_flag)
{
if(!check_probability_model(model_))
{
mexPrintf("probability output is only supported for logistic regression\n");
prob_estimate_flag=0;
}
}
if(mxIsSparse(prhs[1]))
do_predict(nlhs, plhs, prhs, model_, prob_estimate_flag);
else
{
mexPrintf("Testing_instance_matrix must be sparse; "
"use sparse(Testing_instance_matrix) first\n");
fake_answer(nlhs, plhs);
}
// destroy model_
free_and_destroy_model(&model_);
}
else
{
mexPrintf("model file should be a struct array\n");
fake_answer(nlhs, plhs);
}
return;
}

View File

@@ -0,0 +1,523 @@
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "linear.h"
#include "mex.h"
#include "linear_model_matlab.h"
#ifdef MX_API_VER
#if MX_API_VER < 0x07030000
typedef int mwIndex;
#endif
#endif
#define CMD_LEN 2048
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
#define INF HUGE_VAL
void print_null(const char *s) {}
void print_string_matlab(const char *s) {mexPrintf(s);}
void exit_with_help()
{
mexPrintf(
"Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n"
"liblinear_options:\n"
"-s type : set type of solver (default 1)\n"
" for multi-class classification\n"
" 0 -- L2-regularized logistic regression (primal)\n"
" 1 -- L2-regularized L2-loss support vector classification (dual)\n"
" 2 -- L2-regularized L2-loss support vector classification (primal)\n"
" 3 -- L2-regularized L1-loss support vector classification (dual)\n"
" 4 -- support vector classification by Crammer and Singer\n"
" 5 -- L1-regularized L2-loss support vector classification\n"
" 6 -- L1-regularized logistic regression\n"
" 7 -- L2-regularized logistic regression (dual)\n"
" for regression\n"
" 11 -- L2-regularized L2-loss support vector regression (primal)\n"
" 12 -- L2-regularized L2-loss support vector regression (dual)\n"
" 13 -- L2-regularized L1-loss support vector regression (dual)\n"
" for outlier detection\n"
" 21 -- one-class support vector machine (dual)\n"
"-c cost : set the parameter C (default 1)\n"
"-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
"-n nu : set the parameter nu of one-class SVM (default 0.5)\n"
"-e epsilon : set tolerance of termination criterion\n"
" -s 0 and 2\n"
" |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
" where f is the primal function and pos/neg are # of\n"
" positive/negative data (default 0.01)\n"
" -s 11\n"
" |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.0001)\n"
" -s 1, 3, 4, 7, and 21\n"
" Dual maximal violation <= eps; similar to libsvm (default 0.1 except 0.01 for -s 21)\n"
" -s 5 and 6\n"
" |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
" where f is the primal function (default 0.01)\n"
" -s 12 and 13\n"
" |f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
" where f is the dual function (default 0.1)\n"
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
"-R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is\n"
" (for -s 0, 2, 5, 6, 11)\n"
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
"-v n: n-fold cross validation mode\n"
"-C : find parameters (C for -s 0, 2 and C, p for -s 11)\n"
"-q : quiet mode (no outputs)\n"
"col:\n"
" if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
);
}
// liblinear arguments
struct parameter param; // set by parse_command_line
struct problem prob; // set by read_problem
struct model *model_;
struct feature_node *x_space;
int flag_cross_validation;
int flag_find_parameters;
int flag_C_specified;
int flag_p_specified;
int flag_solver_specified;
int col_format_flag;
int nr_fold;
double bias;
void do_find_parameters(double *best_C, double *best_p, double *best_score)
{
double start_C, start_p;
if (flag_C_specified)
start_C = param.C;
else
start_C = -1.0;
if (flag_p_specified)
start_p = param.p;
else
start_p = -1.0;
mexPrintf("Doing parameter search with %d-fold cross validation.\n", nr_fold);
find_parameters(&prob, &param, nr_fold, start_C, start_p, best_C, best_p, best_score);
if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
mexPrintf("Best C = %g CV accuracy = %g%%\n", *best_C, 100.0**best_score);
else if(param.solver_type == L2R_L2LOSS_SVR)
mexPrintf("Best C = %g Best p = %g CV MSE = %g\n", *best_C, *best_p, *best_score);
}
double do_cross_validation()
{
int i;
int total_correct = 0;
double total_error = 0;
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
double *target = Malloc(double, prob.l);
double retval = 0.0;
cross_validation(&prob,&param,nr_fold,target);
if(param.solver_type == L2R_L2LOSS_SVR ||
param.solver_type == L2R_L1LOSS_SVR_DUAL ||
param.solver_type == L2R_L2LOSS_SVR_DUAL)
{
for(i=0;i<prob.l;i++)
{
double y = prob.y[i];
double v = target[i];
total_error += (v-y)*(v-y);
sumv += v;
sumy += y;
sumvv += v*v;
sumyy += y*y;
sumvy += v*y;
}
mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
);
retval = total_error/prob.l;
}
else
{
for(i=0;i<prob.l;i++)
if(target[i] == prob.y[i])
++total_correct;
mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
retval = 100.0*total_correct/prob.l;
}
free(target);
return retval;
}
// nrhs should be 3
int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
{
int i, argc = 1;
char cmd[CMD_LEN];
char *argv[CMD_LEN/2];
void (*print_func)(const char *) = print_string_matlab; // default printing to matlab display
// default values
param.solver_type = L2R_L2LOSS_SVC_DUAL;
param.C = 1;
param.p = 0.1;
param.nu = 0.5;
param.eps = INF; // see setting below
param.nr_weight = 0;
param.weight_label = NULL;
param.weight = NULL;
param.init_sol = NULL;
param.regularize_bias = 1;
flag_cross_validation = 0;
col_format_flag = 0;
flag_C_specified = 0;
flag_p_specified = 0;
flag_solver_specified = 0;
flag_find_parameters = 0;
bias = -1;
if(nrhs <= 1)
return 1;
if(nrhs == 4)
{
mxGetString(prhs[3], cmd, mxGetN(prhs[3])+1);
if(strcmp(cmd, "col") == 0)
col_format_flag = 1;
}
// put options in argv[]
if(nrhs > 2)
{
mxGetString(prhs[2], cmd, mxGetN(prhs[2]) + 1);
if((argv[argc] = strtok(cmd, " ")) != NULL)
while((argv[++argc] = strtok(NULL, " ")) != NULL)
;
}
// parse options
for(i=1;i<argc;i++)
{
if(argv[i][0] != '-') break;
++i;
if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C'
&& argv[i-1][1] != 'R') // since options -q and -C have no parameter
return 1;
switch(argv[i-1][1])
{
case 's':
param.solver_type = atoi(argv[i]);
flag_solver_specified = 1;
break;
case 'c':
param.C = atof(argv[i]);
flag_C_specified = 1;
break;
case 'p':
param.p = atof(argv[i]);
flag_p_specified = 1;
break;
case 'n':
param.nu = atof(argv[i]);
break;
case 'e':
param.eps = atof(argv[i]);
break;
case 'B':
bias = atof(argv[i]);
break;
case 'v':
flag_cross_validation = 1;
nr_fold = atoi(argv[i]);
if(nr_fold < 2)
{
mexPrintf("n-fold cross validation: n must >= 2\n");
return 1;
}
break;
case 'w':
++param.nr_weight;
param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight);
param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight);
param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
param.weight[param.nr_weight-1] = atof(argv[i]);
break;
case 'q':
print_func = &print_null;
i--;
break;
case 'C':
flag_find_parameters = 1;
i--;
break;
case 'R':
param.regularize_bias = 0;
i--;
break;
default:
mexPrintf("unknown option\n");
return 1;
}
}
set_print_string_function(print_func);
// default solver for parameter selection is L2R_L2LOSS_SVC
if(flag_find_parameters)
{
if(!flag_cross_validation)
nr_fold = 5;
if(!flag_solver_specified)
{
mexPrintf("Solver not specified. Using -s 2\n");
param.solver_type = L2R_L2LOSS_SVC;
}
else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC && param.solver_type != L2R_L2LOSS_SVR)
{
mexPrintf("Warm-start parameter search only available for -s 0, -s 2 and -s 11\n");
return 1;
}
}
if(param.eps == INF)
{
switch(param.solver_type)
{
case L2R_LR:
case L2R_L2LOSS_SVC:
param.eps = 0.01;
break;
case L2R_L2LOSS_SVR:
param.eps = 0.0001;
break;
case L2R_L2LOSS_SVC_DUAL:
case L2R_L1LOSS_SVC_DUAL:
case MCSVM_CS:
case L2R_LR_DUAL:
param.eps = 0.1;
break;
case L1R_L2LOSS_SVC:
case L1R_LR:
param.eps = 0.01;
break;
case L2R_L1LOSS_SVR_DUAL:
case L2R_L2LOSS_SVR_DUAL:
param.eps = 0.1;
break;
case ONECLASS_SVM:
param.eps = 0.01;
break;
}
}
return 0;
}
static void fake_answer(int nlhs, mxArray *plhs[])
{
int i;
for(i=0;i<nlhs;i++)
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
}
int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat)
{
mwIndex *ir, *jc, low, high, k;
// using size_t due to the output type of matlab functions
size_t i, j, l, elements, max_index, label_vector_row_num;
mwSize num_samples;
double *samples, *labels;
mxArray *instance_mat_col; // instance sparse matrix in column format
prob.x = NULL;
prob.y = NULL;
x_space = NULL;
if(col_format_flag)
instance_mat_col = (mxArray *)instance_mat;
else
{
// transpose instance matrix
mxArray *prhs[1], *plhs[1];
prhs[0] = mxDuplicateArray(instance_mat);
if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
{
mexPrintf("Error: cannot transpose training instance matrix\n");
return -1;
}
instance_mat_col = plhs[0];
mxDestroyArray(prhs[0]);
}
// the number of instance
l = mxGetN(instance_mat_col);
label_vector_row_num = mxGetM(label_vec);
prob.l = (int) l;
if(label_vector_row_num!=l)
{
mexPrintf("Length of label vector does not match # of instances.\n");
return -1;
}
// each column is one instance
labels = mxGetPr(label_vec);
samples = mxGetPr(instance_mat_col);
ir = mxGetIr(instance_mat_col);
jc = mxGetJc(instance_mat_col);
num_samples = mxGetNzmax(instance_mat_col);
elements = num_samples + l*2;
max_index = mxGetM(instance_mat_col);
prob.y = Malloc(double, l);
prob.x = Malloc(struct feature_node*, l);
x_space = Malloc(struct feature_node, elements);
prob.bias=bias;
j = 0;
for(i=0;i<l;i++)
{
prob.x[i] = &x_space[j];
prob.y[i] = labels[i];
low = jc[i], high = jc[i+1];
for(k=low;k<high;k++)
{
x_space[j].index = (int) ir[k]+1;
x_space[j].value = samples[k];
j++;
}
if(prob.bias>=0)
{
x_space[j].index = (int) max_index+1;
x_space[j].value = prob.bias;
j++;
}
x_space[j++].index = -1;
}
if(prob.bias>=0)
prob.n = (int) max_index+1;
else
prob.n = (int) max_index;
return 0;
}
// Interface function of matlab
// now assume prhs[0]: label prhs[1]: features
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[] )
{
const char *error_msg;
// fix random seed to have same results for each run
// (for cross validation)
srand(1);
if(nlhs > 1)
{
exit_with_help();
fake_answer(nlhs, plhs);
return;
}
// Transform the input Matrix to libsvm format
if(nrhs > 1 && nrhs < 5)
{
int err=0;
if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1]))
{
mexPrintf("Error: label vector and instance matrix must be double\n");
fake_answer(nlhs, plhs);
return;
}
if(mxIsSparse(prhs[0]))
{
mexPrintf("Error: label vector should not be in sparse format");
fake_answer(nlhs, plhs);
return;
}
if(parse_command_line(nrhs, prhs, NULL))
{
exit_with_help();
destroy_param(&param);
fake_answer(nlhs, plhs);
return;
}
if(mxIsSparse(prhs[1]))
err = read_problem_sparse(prhs[0], prhs[1]);
else
{
mexPrintf("Training_instance_matrix must be sparse; "
"use sparse(Training_instance_matrix) first\n");
destroy_param(&param);
fake_answer(nlhs, plhs);
return;
}
// train's original code
error_msg = check_parameter(&prob, &param);
if(err || error_msg)
{
if (error_msg != NULL)
mexPrintf("Error: %s\n", error_msg);
destroy_param(&param);
free(prob.y);
free(prob.x);
free(x_space);
fake_answer(nlhs, plhs);
return;
}
if (flag_find_parameters)
{
double best_C, best_p, best_score, *ptr;
do_find_parameters(&best_C, &best_p, &best_score);
plhs[0] = mxCreateDoubleMatrix(3, 1, mxREAL);
ptr = mxGetPr(plhs[0]);
ptr[0] = best_C;
ptr[1] = best_p;
ptr[2] = best_score;
}
else if(flag_cross_validation)
{
double *ptr;
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
ptr = mxGetPr(plhs[0]);
ptr[0] = do_cross_validation();
}
else
{
const char *error_msg;
model_ = train(&prob, &param);
error_msg = model_to_matlab_structure(plhs, model_);
if(error_msg)
mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg);
free_and_destroy_model(&model_);
}
destroy_param(&param);
free(prob.y);
free(prob.x);
free(x_space);
}
else
{
exit_with_help();
fake_answer(nlhs, plhs);
return;
}
}