First commit

This commit is contained in:
2025-06-22 00:31:33 +02:00
parent a52c20d1fb
commit 4bdbcad256
110 changed files with 31991 additions and 1 deletions

View File

@@ -0,0 +1,2 @@
include cpp-source/*
include cpp-source/*/*

View File

@@ -0,0 +1,4 @@
all = lib
lib:
make -C .. lib

511
libsvm-3.36/python/README Normal file
View File

@@ -0,0 +1,511 @@
----------------------------------
--- Python interface of LIBSVM ---
----------------------------------
Table of Contents
=================
- Introduction
- Installation via PyPI
- Installation via Sources
- Quick Start
- Quick Start with Scipy
- Design Description
- Data Structures
- Utility Functions
- Additional Information
Introduction
============
Python (http://www.python.org/) is a programming language suitable for rapid
development. This tool provides a simple Python interface to LIBSVM, a library
for support vector machines (http://www.csie.ntu.edu.tw/~cjlin/libsvm). The
interface is very easy to use as the usage is the same as that of LIBSVM. The
interface is developed with the built-in Python library "ctypes."
Installation via PyPI
=====================
To install the interface from PyPI, execute the following command:
> pip install -U libsvm-official
Installation via Sources
========================
Alternatively, you may install the interface from sources by
generating the LIBSVM shared library.
Depending on your use cases, you can choose between local-directory
and system-wide installation.
- Local-directory installation:
On Unix systems, type
> make
This generates a .so file in the LIBSVM main directory and you
can run the interface in the current python directory.
For Windows, the shared library libsvm.dll is ready in the
directory `..\windows' and you can directly run the interface in
the current python directory. You can copy libsvm.dll to the
system directory (e.g., `C:\WINDOWS\system32\') to make it
system-widely available. To regenerate libsvm.dll, please
follow the instruction of building Windows binaries in LIBSVM
README.
- System-wide installation:
Type
> pip install -e .
or
> pip install --user -e .
The option --user would install the package in the home directory
instead of the system directory, and thus does not require the
root privilege.
Please note that you must keep the sources after the installation.
For Windows, to run the above command, Microsoft Visual C++ and
other tools are needed.
In addition, DON'T use the following FAILED commands
> python setup.py install (failed to run at the python directory)
> pip install .
Quick Start
===========
"Quick Start with Scipy" is in the next section.
There are two levels of usage. The high-level one uses utility
functions in svmutil.py and commonutil.py (shared with LIBLINEAR and
imported by svmutil.py). The usage is the same as the LIBSVM MATLAB
interface.
>>> from libsvm.svmutil import *
# Read data in LIBSVM format
>>> y, x = svm_read_problem('../heart_scale')
>>> m = svm_train(y[:200], x[:200], '-c 4')
>>> p_label, p_acc, p_val = svm_predict(y[200:], x[200:], m)
# Construct problem in python format
# Dense data
>>> y, x = [1,-1], [[1,0,1], [-1,0,-1]]
# Sparse data
>>> y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}]
>>> prob = svm_problem(y, x)
>>> param = svm_parameter('-t 0 -c 4 -b 1')
>>> m = svm_train(prob, param)
# Precomputed kernel data (-t 4)
# Dense data
>>> y, x = [1,-1], [[1, 2, -2], [2, -2, 2]]
# Sparse data
>>> y, x = [1,-1], [{0:1, 1:2, 2:-2}, {0:2, 1:-2, 2:2}]
# isKernel=True must be set for precomputed kernel
>>> prob = svm_problem(y, x, isKernel=True)
>>> param = svm_parameter('-t 4 -c 4 -b 1')
>>> m = svm_train(prob, param)
# For the format of precomputed kernel, please read LIBSVM README.
# Other utility functions
>>> svm_save_model('heart_scale.model', m)
>>> m = svm_load_model('heart_scale.model')
>>> p_label, p_acc, p_val = svm_predict(y, x, m, '-b 1')
>>> ACC, MSE, SCC = evaluations(y, p_label)
# Getting online help
>>> help(svm_train)
The low-level use directly calls C interfaces imported by svm.py. Note that
all arguments and return values are in ctypes format. You need to handle them
carefully.
>>> from libsvm.svm import *
>>> prob = svm_problem([1,-1], [{1:1, 3:1}, {1:-1,3:-1}])
>>> param = svm_parameter('-c 4')
>>> m = libsvm.svm_train(prob, param) # m is a ctype pointer to an svm_model
# Convert a Python-format instance to svm_nodearray, a ctypes structure
>>> x0, max_idx = gen_svm_nodearray({1:1, 3:1})
>>> label = libsvm.svm_predict(m, x0)
Quick Start with Scipy
======================
Make sure you have Scipy installed to proceed in this section.
If numba (http://numba.pydata.org) is installed, some operations will be much faster.
There are two levels of usage. The high-level one uses utility functions
in svmutil.py and the usage is the same as the LIBSVM MATLAB interface.
>>> import numpy as np
>>> import scipy
>>> from libsvm.svmutil import *
# Read data in LIBSVM format
>>> y, x = svm_read_problem('../heart_scale', return_scipy = True) # y: ndarray, x: csr_matrix
>>> m = svm_train(y[:200], x[:200, :], '-c 4')
>>> p_label, p_acc, p_val = svm_predict(y[200:], x[200:, :], m)
# Construct problem in Scipy format
# Dense data: numpy ndarray
>>> y, x = np.asarray([1,-1]), np.asarray([[1,0,1], [-1,0,-1]])
# Sparse data: scipy csr_matrix((data, (row_ind, col_ind))
>>> y, x = np.asarray([1,-1]), scipy.sparse.csr_matrix(([1, 1, -1, -1], ([0, 0, 1, 1], [0, 2, 0, 2])))
>>> prob = svm_problem(y, x)
>>> param = svm_parameter('-t 0 -c 4 -b 1')
>>> m = svm_train(prob, param)
# Precomputed kernel data (-t 4)
# Dense data: numpy ndarray
>>> y, x = np.asarray([1,-1]), np.asarray([[1,2,-2], [2,-2,2]])
# Sparse data: scipy csr_matrix((data, (row_ind, col_ind))
>>> y, x = np.asarray([1,-1]), scipy.sparse.csr_matrix(([1, 2, -2, 2, -2, 2], ([0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2])))
# isKernel=True must be set for precomputed kernel
>>> prob = svm_problem(y, x, isKernel=True)
>>> param = svm_parameter('-t 4 -c 4 -b 1')
>>> m = svm_train(prob, param)
# For the format of precomputed kernel, please read LIBSVM README.
# Apply data scaling in Scipy format
>>> y, x = svm_read_problem('../heart_scale', return_scipy=True)
>>> scale_param = csr_find_scale_param(x, lower=0)
>>> scaled_x = csr_scale(x, scale_param)
# Other utility functions
>>> svm_save_model('heart_scale.model', m)
>>> m = svm_load_model('heart_scale.model')
>>> p_label, p_acc, p_val = svm_predict(y, x, m, '-b 1')
>>> ACC, MSE, SCC = evaluations(y, p_label)
# Getting online help
>>> help(svm_train)
The low-level use directly calls C interfaces imported by svm.py. Note that
all arguments and return values are in ctypes format. You need to handle them
carefully.
>>> from libsvm.svm import *
>>> prob = svm_problem(np.asarray([1,-1]), scipy.sparse.csr_matrix(([1, 1, -1, -1], ([0, 0, 1, 1], [0, 2, 0, 2]))))
>>> param = svm_parameter('-c 4')
>>> m = libsvm.svm_train(prob, param) # m is a ctype pointer to an svm_model
# Convert a tuple of ndarray (index, data) to feature_nodearray, a ctypes structure
# Note that index starts from 0, though the following example will be changed to 1:1, 3:1 internally
>>> x0, max_idx = gen_svm_nodearray((np.asarray([0,2]), np.asarray([1,1])))
>>> label = libsvm.svm_predict(m, x0)
Design Description
==================
There are two files svm.py and svmutil.py, which respectively correspond to
low-level and high-level use of the interface.
In svm.py, we adopt the Python built-in library "ctypes," so that
Python can directly access C structures and interface functions defined
in svm.h.
While advanced users can use structures/functions in svm.py, to
avoid handling ctypes structures, in svmutil.py we provide some easy-to-use
functions. The usage is similar to LIBSVM MATLAB interface.
Data Structures
===============
Four data structures derived from svm.h are svm_node, svm_problem, svm_parameter,
and svm_model. They all contain fields with the same names in svm.h. Access
these fields carefully because you directly use a C structure instead of a
Python object. For svm_model, accessing the field directly is not recommanded.
Programmers should use the interface functions or methods of svm_model class
in Python to get the values. The following description introduces additional
fields and methods.
Before using the data structures, execute the following command to load the
LIBSVM shared library:
>>> from libsvm.svm import *
- class svm_node:
Construct an svm_node.
>>> node = svm_node(idx, val)
idx: an integer indicates the feature index.
val: a float indicates the feature value.
Show the index and the value of a node.
>>> print(node)
- Function: gen_svm_nodearray(xi [,feature_max=None [,isKernel=False]])
Generate a feature vector from a Python list/tuple/dictionary, numpy ndarray or tuple of (index, data):
>>> xi_ctype, max_idx = gen_svm_nodearray({1:1, 3:1, 5:-2})
xi_ctype: the returned svm_nodearray (a ctypes structure)
max_idx: the maximal feature index of xi
feature_max: if feature_max is assigned, features with indices larger than
feature_max are removed.
isKernel: if isKernel == True, the list index starts from 0 for precomputed
kernel. Otherwise, the list index starts from 1. The default
value is False.
- class svm_problem:
Construct an svm_problem instance
>>> prob = svm_problem(y, x)
y: a Python list/tuple/ndarray of l labels (type must be int/double).
x: 1. a list/tuple of l training instances. Feature vector of
each training instance is a list/tuple or dictionary.
2. an l * n numpy ndarray or scipy spmatrix (n: number of features).
Note that if your x contains sparse data (i.e., dictionary), the internal
ctypes data format is still sparse.
For pre-computed kernel, the isKernel flag should be set to True:
>>> prob = svm_problem(y, x, isKernel=True)
Please read LIBSVM README for more details of pre-computed kernel.
- class svm_parameter:
Construct an svm_parameter instance
>>> param = svm_parameter('training_options')
If 'training_options' is empty, LIBSVM default values are applied.
Set param to LIBSVM default values.
>>> param.set_to_default_values()
Parse a string of options.
>>> param.parse_options('training_options')
Show values of parameters.
>>> print(param)
- class svm_model:
There are two ways to obtain an instance of svm_model:
>>> model = svm_train(y, x)
>>> model = svm_load_model('model_file_name')
Note that the returned structure of interface functions
libsvm.svm_train and libsvm.svm_load_model is a ctypes pointer of
svm_model, which is different from the svm_model object returned
by svm_train and svm_load_model in svmutil.py. We provide a
function toPyModel for the conversion:
>>> model_ptr = libsvm.svm_train(prob, param)
>>> model = toPyModel(model_ptr)
If you obtain a model in a way other than the above approaches,
handle it carefully to avoid memory leak or segmentation fault.
Some interface functions to access LIBSVM models are wrapped as
members of the class svm_model:
>>> svm_type = model.get_svm_type()
>>> nr_class = model.get_nr_class()
>>> svr_probability = model.get_svr_probability()
>>> class_labels = model.get_labels()
>>> sv_indices = model.get_sv_indices()
>>> nr_sv = model.get_nr_sv()
>>> is_prob_model = model.is_probability_model()
>>> support_vector_coefficients = model.get_sv_coef()
>>> support_vectors = model.get_SV()
Utility Functions
=================
To use utility functions, type
>>> from libsvm.svmutil import *
The above command loads
svm_train() : train an SVM model
svm_predict() : predict testing data
svm_read_problem() : read the data from a LIBSVM-format file or object.
svm_load_model() : load a LIBSVM model.
svm_save_model() : save model to a file.
evaluations() : evaluate prediction results.
csr_find_scale_param() : find scaling parameter for data in csr format.
csr_scale() : apply data scaling to data in csr format.
- Function: svm_train
There are three ways to call svm_train()
>>> model = svm_train(y, x [, 'training_options'])
>>> model = svm_train(prob [, 'training_options'])
>>> model = svm_train(prob, param)
y: a list/tuple/ndarray of l training labels (type must be int/double).
x: 1. a list/tuple of l training instances. Feature vector of
each training instance is a list/tuple or dictionary.
2. an l * n numpy ndarray or scipy spmatrix (n: number of features).
training_options: a string in the same form as that for LIBSVM command
mode.
prob: an svm_problem instance generated by calling
svm_problem(y, x).
For pre-computed kernel, you should use
svm_problem(y, x, isKernel=True)
param: an svm_parameter instance generated by calling
svm_parameter('training_options')
model: the returned svm_model instance. See svm.h for details of this
structure. If '-v' is specified, cross validation is
conducted and the returned model is just a scalar: cross-validation
accuracy for classification and mean-squared error for regression.
To train the same data many times with different
parameters, the second and the third ways should be faster..
Examples:
>>> y, x = svm_read_problem('../heart_scale')
>>> prob = svm_problem(y, x)
>>> param = svm_parameter('-s 3 -c 5 -h 0')
>>> m = svm_train(y, x, '-c 5')
>>> m = svm_train(prob, '-t 2 -c 5')
>>> m = svm_train(prob, param)
>>> CV_ACC = svm_train(y, x, '-v 3')
- Function: svm_predict
To predict testing data with a model, use
>>> p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options'])
y: a list/tuple/ndarray of l true labels (type must be int/double).
It is used for calculating the accuracy. Use [] if true labels are
unavailable.
x: 1. a list/tuple of l training instances. Feature vector of
each training instance is a list/tuple or dictionary.
2. an l * n numpy ndarray or scipy spmatrix (n: number of features).
predicting_options: a string of predicting options in the same format as
that of LIBSVM.
model: an svm_model instance.
p_labels: a list of predicted labels
p_acc: a tuple including accuracy (for classification), mean
squared error, and squared correlation coefficient (for
regression).
p_vals: a list of decision values or probability estimates (if '-b 1'
is specified). If k is the number of classes in training data,
for decision values, each element includes results of predicting
k(k-1)/2 binary-class SVMs. For classification, k = 1 is a
special case. Decision value [+1] is returned for each testing
instance, instead of an empty list.
For probabilities, each element contains k values indicating
the probability that the testing instance is in each class.
For one-class SVM, the list has two elements indicating the
probabilities of normal instance/outlier.
Note that the order of classes is the same as the 'model.label'
field in the model structure.
Example:
>>> m = svm_train(y, x, '-c 5')
>>> p_labels, p_acc, p_vals = svm_predict(y, x, m)
- Functions: svm_read_problem/svm_load_model/svm_save_model
See the usage by examples:
>>> y, x = svm_read_problem('data.txt')
>>> with open('data.txt') as f:
>>> y, x = svm_read_problem(f)
>>> m = svm_load_model('model_file')
>>> svm_save_model('model_file', m)
- Function: evaluations
Calculate some evaluations using the true values (ty) and the predicted
values (pv):
>>> (ACC, MSE, SCC) = evaluations(ty, pv, useScipy)
ty: a list/tuple/ndarray of true values.
pv: a list/tuple/ndarray of predicted values.
useScipy: convert ty, pv to ndarray, and use scipy functions to do the evaluation
ACC: accuracy.
MSE: mean squared error.
SCC: squared correlation coefficient.
- Function: csr_find_scale_parameter/csr_scale
Scale data in csr format.
>>> param = csr_find_scale_param(x [, lower=l, upper=u])
>>> x = csr_scale(x, param)
x: a csr_matrix of data.
l: x scaling lower limit; default -1.
u: x scaling upper limit; default 1.
The scaling process is: x * diag(coef) + ones(l, 1) * offset'
param: a dictionary of scaling parameters, where param['coef'] = coef and param['offset'] = offset.
coef: a scipy array of scaling coefficients.
offset: a scipy array of scaling offsets.
Additional Information
======================
This interface was originally written by Hsiang-Fu Yu from Department of Computer
Science, National Taiwan University. If you find this tool useful, please
cite LIBSVM as follows
Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support
vector machines. ACM Transactions on Intelligent Systems and
Technology, 2:27:1--27:27, 2011. Software available at
http://www.csie.ntu.edu.tw/~cjlin/libsvm
For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>,
or check the FAQ page:
http://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html

View File

View File

@@ -0,0 +1,189 @@
from __future__ import print_function
from array import array
import sys
try:
import numpy as np
import scipy
from scipy import sparse
except:
scipy = None
__all__ = ['svm_read_problem', 'evaluations', 'csr_find_scale_param', 'csr_scale']
def svm_read_problem(data_source, return_scipy=False):
"""
svm_read_problem(data_source, return_scipy=False) -> [y, x], y: list, x: list of dictionary
svm_read_problem(data_source, return_scipy=True) -> [y, x], y: ndarray, x: csr_matrix
Read LIBSVM-format data from data_source and return labels y
and data instances x.
"""
if scipy != None and return_scipy:
prob_y = array('d')
prob_x = array('d')
row_ptr = array('l', [0])
col_idx = array('l')
else:
prob_y = []
prob_x = []
row_ptr = [0]
col_idx = []
indx_start = 1
if hasattr(data_source, "read"):
file = data_source
else:
file = open(data_source)
try:
for line in file:
line = line.split(None, 1)
# In case an instance with all zero features
if len(line) == 1: line += ['']
label, features = line
prob_y.append(float(label))
if scipy != None and return_scipy:
nz = 0
for e in features.split():
ind, val = e.split(":")
if ind == '0':
indx_start = 0
val = float(val)
if val != 0:
col_idx.append(int(ind)-indx_start)
prob_x.append(val)
nz += 1
row_ptr.append(row_ptr[-1]+nz)
else:
xi = {}
for e in features.split():
ind, val = e.split(":")
xi[int(ind)] = float(val)
prob_x += [xi]
except Exception as err_msg:
raise err_msg
finally:
if not hasattr(data_source, "read"):
# close file only if it was created by us
file.close()
if scipy != None and return_scipy:
prob_y = np.frombuffer(prob_y, dtype='d')
prob_x = np.frombuffer(prob_x, dtype='d')
col_idx = np.frombuffer(col_idx, dtype='l')
row_ptr = np.frombuffer(row_ptr, dtype='l')
prob_x = sparse.csr_matrix((prob_x, col_idx, row_ptr))
return (prob_y, prob_x)
def evaluations_scipy(ty, pv):
"""
evaluations_scipy(ty, pv) -> (ACC, MSE, SCC)
ty, pv: ndarray
Calculate accuracy, mean squared error and squared correlation coefficient
using the true values (ty) and predicted values (pv).
"""
if not (scipy != None and isinstance(ty, np.ndarray) and isinstance(pv, np.ndarray)):
raise TypeError("type of ty and pv must be ndarray")
if len(ty) != len(pv):
raise ValueError("len(ty) must be equal to len(pv)")
ACC = 100.0*(ty == pv).mean()
MSE = ((ty - pv)**2).mean()
l = len(ty)
sumv = pv.sum()
sumy = ty.sum()
sumvy = (pv*ty).sum()
sumvv = (pv*pv).sum()
sumyy = (ty*ty).sum()
with np.errstate(all = 'raise'):
try:
SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
except:
SCC = float('nan')
return (float(ACC), float(MSE), float(SCC))
def evaluations(ty, pv, useScipy = True):
"""
evaluations(ty, pv, useScipy) -> (ACC, MSE, SCC)
ty, pv: list, tuple or ndarray
useScipy: convert ty, pv to ndarray, and use scipy functions for the evaluation
Calculate accuracy, mean squared error and squared correlation coefficient
using the true values (ty) and predicted values (pv).
"""
if scipy != None and useScipy:
return evaluations_scipy(np.asarray(ty), np.asarray(pv))
if len(ty) != len(pv):
raise ValueError("len(ty) must be equal to len(pv)")
total_correct = total_error = 0
sumv = sumy = sumvv = sumyy = sumvy = 0
for v, y in zip(pv, ty):
if y == v:
total_correct += 1
total_error += (v-y)*(v-y)
sumv += v
sumy += y
sumvv += v*v
sumyy += y*y
sumvy += v*y
l = len(ty)
ACC = 100.0*total_correct/l
MSE = total_error/l
try:
SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
except:
SCC = float('nan')
return (float(ACC), float(MSE), float(SCC))
def csr_find_scale_param(x, lower=-1, upper=1):
assert isinstance(x, sparse.csr_matrix)
assert lower < upper
l, n = x.shape
feat_min = x.min(axis=0).toarray().flatten()
feat_max = x.max(axis=0).toarray().flatten()
coef = (feat_max - feat_min) / (upper - lower)
coef[coef != 0] = 1.0 / coef[coef != 0]
# (x - ones(l,1) * feat_min') * diag(coef) + lower
# = x * diag(coef) - ones(l, 1) * (feat_min' * diag(coef)) + lower
# = x * diag(coef) + ones(l, 1) * (-feat_min' * diag(coef) + lower)
# = x * diag(coef) + ones(l, 1) * offset'
offset = -feat_min * coef + lower
offset[coef == 0] = 0
if sum(offset != 0) * l > 3 * x.getnnz():
print(
"WARNING: The #nonzeros of the scaled data is at least 2 times larger than the original one.\n"
"If feature values are non-negative and sparse, set lower=0 rather than the default lower=-1.",
file=sys.stderr)
return {'coef':coef, 'offset':offset}
def csr_scale(x, scale_param):
assert isinstance(x, sparse.csr_matrix)
offset = scale_param['offset']
coef = scale_param['coef']
assert len(coef) == len(offset)
l, n = x.shape
if not n == len(coef):
print("WARNING: The dimension of scaling parameters and feature number do not match.", file=sys.stderr)
coef = coef.resize(n) # zeros padded if n > len(coef)
offset = offset.resize(n)
# scaled_x = x * diag(coef) + ones(l, 1) * offset'
offset = sparse.csr_matrix(offset.reshape(1, n))
offset = sparse.vstack([offset] * l, format='csr', dtype=x.dtype)
scaled_x = x.dot(sparse.diags(coef, 0, shape=(n, n))) + offset
if scaled_x.getnnz() > x.getnnz():
print(
"WARNING: original #nonzeros %d\n" % x.getnnz() +
" > new #nonzeros %d\n" % scaled_x.getnnz() +
"If feature values are non-negative and sparse, get scale_param by setting lower=0 rather than the default lower=-1.",
file=sys.stderr)
return scaled_x

View File

@@ -0,0 +1,465 @@
from ctypes import *
from ctypes.util import find_library
from os import path
from glob import glob
from enum import IntEnum
import sys
try:
import numpy as np
import scipy
from scipy import sparse
except:
scipy = None
if sys.version_info[0] < 3:
range = xrange
from itertools import izip as zip
__all__ = ['libsvm', 'svm_problem', 'svm_parameter',
'toPyModel', 'gen_svm_nodearray', 'print_null', 'svm_node', 'svm_forms',
'PRINT_STRING_FUN', 'kernel_names', 'c_double', 'svm_model']
try:
dirname = path.dirname(path.abspath(__file__))
dynamic_lib_name = 'clib.cp*'
path_to_so = glob(path.join(dirname, dynamic_lib_name))[0]
libsvm = CDLL(path_to_so)
except:
try:
if sys.platform == 'win32':
libsvm = CDLL(path.join(dirname, r'..\..\windows\libsvm.dll'))
else:
libsvm = CDLL(path.join(dirname, '../../libsvm.so.4'))
except:
# For unix the prefix 'lib' is not considered.
if find_library('svm'):
libsvm = CDLL(find_library('svm'))
elif find_library('libsvm'):
libsvm = CDLL(find_library('libsvm'))
else:
raise Exception('LIBSVM library not found.')
class svm_forms(IntEnum):
C_SVC = 0
NU_SVC = 1
ONE_CLASS = 2
EPSILON_SVR = 3
NU_SVR = 4
class kernel_names(IntEnum):
LINEAR = 0
POLY = 1
RBF = 2
SIGMOID = 3
PRECOMPUTED = 4
PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
def print_null(s):
return
# In multi-threading, all threads share the same memory space of
# the dynamic library (libsvm). Thus, we use a module-level
# variable to keep a reference to ctypes print_null, preventing
# python from garbage collecting it in thread B while thread A
# still needs it. Check the usage of svm_set_print_string_function()
# in LIBSVM README for details.
ctypes_print_null = PRINT_STRING_FUN(print_null)
def genFields(names, types):
return list(zip(names, types))
def fillprototype(f, restype, argtypes):
f.restype = restype
f.argtypes = argtypes
class svm_node(Structure):
_names = ["index", "value"]
_types = [c_int, c_double]
_fields_ = genFields(_names, _types)
def __init__(self, index=-1, value=0):
self.index, self.value = index, value
def __str__(self):
return '%d:%g' % (self.index, self.value)
def gen_svm_nodearray(xi, feature_max=None, isKernel=False):
if feature_max:
assert(isinstance(feature_max, int))
xi_shift = 0 # ensure correct indices of xi
if scipy and isinstance(xi, tuple) and len(xi) == 2\
and isinstance(xi[0], np.ndarray) and isinstance(xi[1], np.ndarray): # for a sparse vector
if not isKernel:
index_range = xi[0] + 1 # index starts from 1
else:
index_range = xi[0] # index starts from 0 for precomputed kernel
if feature_max:
index_range = index_range[np.where(index_range <= feature_max)]
elif scipy and isinstance(xi, np.ndarray):
if not isKernel:
xi_shift = 1
index_range = xi.nonzero()[0] + 1 # index starts from 1
else:
index_range = np.arange(0, len(xi)) # index starts from 0 for precomputed kernel
if feature_max:
index_range = index_range[np.where(index_range <= feature_max)]
elif isinstance(xi, (dict, list, tuple)):
if isinstance(xi, dict):
index_range = sorted(xi.keys())
elif isinstance(xi, (list, tuple)):
if not isKernel:
xi_shift = 1
index_range = range(1, len(xi) + 1) # index starts from 1
else:
index_range = range(0, len(xi)) # index starts from 0 for precomputed kernel
if feature_max:
index_range = list(filter(lambda j: j <= feature_max, index_range))
if not isKernel:
index_range = list(filter(lambda j:xi[j-xi_shift] != 0, index_range))
else:
raise TypeError('xi should be a dictionary, list, tuple, 1-d numpy array, or tuple of (index, data)')
ret = (svm_node*(len(index_range)+1))()
ret[-1].index = -1
if scipy and isinstance(xi, tuple) and len(xi) == 2\
and isinstance(xi[0], np.ndarray) and isinstance(xi[1], np.ndarray): # for a sparse vector
# since xi=(indices, values), we must sort them simultaneously.
for idx, arg in enumerate(np.argsort(index_range)):
ret[idx].index = index_range[arg]
ret[idx].value = (xi[1])[arg]
else:
for idx, j in enumerate(index_range):
ret[idx].index = j
ret[idx].value = xi[j - xi_shift]
max_idx = 0
if len(index_range) > 0:
max_idx = index_range[-1]
return ret, max_idx
try:
from numba import jit
jit_enabled = True
except:
# We need to support two cases: when jit is called with no arguments, and when jit is called with
# a keyword argument.
def jit(func=None, *args, **kwargs):
if func is None:
# This handles the case where jit is used with parentheses: @jit(nopython=True)
return lambda x: x
else:
# This handles the case where jit is used without parentheses: @jit
return func
jit_enabled = False
@jit(nopython=True)
def csr_to_problem_jit(l, x_val, x_ind, x_rowptr, prob_val, prob_ind, prob_rowptr, indx_start):
for i in range(l):
b1,e1 = x_rowptr[i], x_rowptr[i+1]
b2,e2 = prob_rowptr[i], prob_rowptr[i+1]-1
for j in range(b1,e1):
prob_ind[j-b1+b2] = x_ind[j]+indx_start
prob_val[j-b1+b2] = x_val[j]
def csr_to_problem_nojit(l, x_val, x_ind, x_rowptr, prob_val, prob_ind, prob_rowptr, indx_start):
for i in range(l):
x_slice = slice(x_rowptr[i], x_rowptr[i+1])
prob_slice = slice(prob_rowptr[i], prob_rowptr[i+1]-1)
prob_ind[prob_slice] = x_ind[x_slice]+indx_start
prob_val[prob_slice] = x_val[x_slice]
def csr_to_problem(x, prob, isKernel):
if not x.has_sorted_indices:
x.sort_indices()
# Extra space for termination node and (possibly) bias term
x_space = prob.x_space = np.empty((x.nnz+x.shape[0]), dtype=svm_node)
# rowptr has to be a 64bit integer because it will later be used for pointer arithmetic,
# which overflows when the added pointer points to an address that is numerically high.
prob.rowptr = x.indptr.astype(np.int64, copy=True)
prob.rowptr[1:] += np.arange(1,x.shape[0]+1)
prob_ind = x_space["index"]
prob_val = x_space["value"]
prob_ind[:] = -1
if not isKernel:
indx_start = 1 # index starts from 1
else:
indx_start = 0 # index starts from 0 for precomputed kernel
if jit_enabled:
csr_to_problem_jit(x.shape[0], x.data, x.indices, x.indptr, prob_val, prob_ind, prob.rowptr, indx_start)
else:
csr_to_problem_nojit(x.shape[0], x.data, x.indices, x.indptr, prob_val, prob_ind, prob.rowptr, indx_start)
class svm_problem(Structure):
_names = ["l", "y", "x"]
_types = [c_int, POINTER(c_double), POINTER(POINTER(svm_node))]
_fields_ = genFields(_names, _types)
def __init__(self, y, x, isKernel=False):
if (not isinstance(y, (list, tuple))) and (not (scipy and isinstance(y, np.ndarray))):
raise TypeError("type of y: {0} is not supported!".format(type(y)))
if isinstance(x, (list, tuple)):
if len(y) != len(x):
raise ValueError("len(y) != len(x)")
elif scipy != None and isinstance(x, (np.ndarray, sparse.spmatrix)):
if len(y) != x.shape[0]:
raise ValueError("len(y) != len(x)")
if isinstance(x, np.ndarray):
x = np.ascontiguousarray(x) # enforce row-major
if isinstance(x, sparse.spmatrix):
x = x.tocsr()
pass
else:
raise TypeError("type of x: {0} is not supported!".format(type(x)))
self.l = l = len(y)
max_idx = 0
x_space = self.x_space = []
if scipy != None and isinstance(x, sparse.csr_matrix):
csr_to_problem(x, self, isKernel)
max_idx = x.shape[1]
else:
for i, xi in enumerate(x):
tmp_xi, tmp_idx = gen_svm_nodearray(xi,isKernel=isKernel)
x_space += [tmp_xi]
max_idx = max(max_idx, tmp_idx)
self.n = max_idx
self.y = (c_double * l)()
if scipy != None and isinstance(y, np.ndarray):
np.ctypeslib.as_array(self.y, (self.l,))[:] = y
else:
for i, yi in enumerate(y): self.y[i] = yi
self.x = (POINTER(svm_node) * l)()
if scipy != None and isinstance(x, sparse.csr_matrix):
base = addressof(self.x_space.ctypes.data_as(POINTER(svm_node))[0])
x_ptr = cast(self.x, POINTER(c_uint64))
x_ptr = np.ctypeslib.as_array(x_ptr,(self.l,))
x_ptr[:] = self.rowptr[:-1]*sizeof(svm_node)+base
else:
for i, xi in enumerate(self.x_space): self.x[i] = xi
class svm_parameter(Structure):
_names = ["svm_type", "kernel_type", "degree", "gamma", "coef0",
"cache_size", "eps", "C", "nr_weight", "weight_label", "weight",
"nu", "p", "shrinking", "probability"]
_types = [c_int, c_int, c_int, c_double, c_double,
c_double, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double),
c_double, c_double, c_int, c_int]
_fields_ = genFields(_names, _types)
def __init__(self, options = None):
if options == None:
options = ''
self.parse_options(options)
def __str__(self):
s = ''
attrs = svm_parameter._names + list(self.__dict__.keys())
values = map(lambda attr: getattr(self, attr), attrs)
for attr, val in zip(attrs, values):
s += (' %s: %s\n' % (attr, val))
s = s.strip()
return s
def set_to_default_values(self):
self.svm_type = svm_forms.C_SVC;
self.kernel_type = kernel_names.RBF
self.degree = 3
self.gamma = 0
self.coef0 = 0
self.nu = 0.5
self.cache_size = 100
self.C = 1
self.eps = 0.001
self.p = 0.1
self.shrinking = 1
self.probability = 0
self.nr_weight = 0
self.weight_label = None
self.weight = None
self.cross_validation = False
self.nr_fold = 0
self.print_func = cast(None, PRINT_STRING_FUN)
def parse_options(self, options):
if isinstance(options, list):
argv = options
elif isinstance(options, str):
argv = options.split()
else:
raise TypeError("arg 1 should be a list or a str.")
self.set_to_default_values()
self.print_func = cast(None, PRINT_STRING_FUN)
weight_label = []
weight = []
i = 0
while i < len(argv):
if argv[i] == "-s":
i = i + 1
self.svm_type = svm_forms(int(argv[i]))
elif argv[i] == "-t":
i = i + 1
self.kernel_type = kernel_names(int(argv[i]))
elif argv[i] == "-d":
i = i + 1
self.degree = int(argv[i])
elif argv[i] == "-g":
i = i + 1
self.gamma = float(argv[i])
elif argv[i] == "-r":
i = i + 1
self.coef0 = float(argv[i])
elif argv[i] == "-n":
i = i + 1
self.nu = float(argv[i])
elif argv[i] == "-m":
i = i + 1
self.cache_size = float(argv[i])
elif argv[i] == "-c":
i = i + 1
self.C = float(argv[i])
elif argv[i] == "-e":
i = i + 1
self.eps = float(argv[i])
elif argv[i] == "-p":
i = i + 1
self.p = float(argv[i])
elif argv[i] == "-h":
i = i + 1
self.shrinking = int(argv[i])
elif argv[i] == "-b":
i = i + 1
self.probability = int(argv[i])
elif argv[i] == "-q":
self.print_func = ctypes_print_null
elif argv[i] == "-v":
i = i + 1
self.cross_validation = 1
self.nr_fold = int(argv[i])
if self.nr_fold < 2:
raise ValueError("n-fold cross validation: n must >= 2")
elif argv[i].startswith("-w"):
i = i + 1
self.nr_weight += 1
weight_label += [int(argv[i-1][2:])]
weight += [float(argv[i])]
else:
raise ValueError("Wrong options")
i += 1
libsvm.svm_set_print_string_function(self.print_func)
self.weight_label = (c_int*self.nr_weight)()
self.weight = (c_double*self.nr_weight)()
for i in range(self.nr_weight):
self.weight[i] = weight[i]
self.weight_label[i] = weight_label[i]
class svm_model(Structure):
_names = ['param', 'nr_class', 'l', 'SV', 'sv_coef', 'rho',
'probA', 'probB', 'prob_density_marks', 'sv_indices',
'label', 'nSV', 'free_sv']
_types = [svm_parameter, c_int, c_int, POINTER(POINTER(svm_node)),
POINTER(POINTER(c_double)), POINTER(c_double),
POINTER(c_double), POINTER(c_double), POINTER(c_double),
POINTER(c_int), POINTER(c_int), POINTER(c_int), c_int]
_fields_ = genFields(_names, _types)
def __init__(self):
self.__createfrom__ = 'python'
def __del__(self):
# free memory created by C to avoid memory leak
if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
libsvm.svm_free_and_destroy_model(pointer(pointer(self)))
def get_svm_type(self):
return libsvm.svm_get_svm_type(self)
def get_nr_class(self):
return libsvm.svm_get_nr_class(self)
def get_svr_probability(self):
return libsvm.svm_get_svr_probability(self)
def get_labels(self):
nr_class = self.get_nr_class()
labels = (c_int * nr_class)()
libsvm.svm_get_labels(self, labels)
return labels[:nr_class]
def get_sv_indices(self):
total_sv = self.get_nr_sv()
sv_indices = (c_int * total_sv)()
libsvm.svm_get_sv_indices(self, sv_indices)
return sv_indices[:total_sv]
def get_nr_sv(self):
return libsvm.svm_get_nr_sv(self)
def is_probability_model(self):
return (libsvm.svm_check_probability_model(self) == 1)
def get_sv_coef(self):
return [tuple(self.sv_coef[j][i] for j in range(self.nr_class - 1))
for i in range(self.l)]
def get_SV(self):
result = []
for sparse_sv in self.SV[:self.l]:
row = dict()
i = 0
while True:
if sparse_sv[i].index == -1:
break
row[sparse_sv[i].index] = sparse_sv[i].value
i += 1
result.append(row)
return result
def toPyModel(model_ptr):
"""
toPyModel(model_ptr) -> svm_model
Convert a ctypes POINTER(svm_model) to a Python svm_model
"""
if bool(model_ptr) == False:
raise ValueError("Null pointer")
m = model_ptr.contents
m.__createfrom__ = 'C'
return m
fillprototype(libsvm.svm_train, POINTER(svm_model), [POINTER(svm_problem), POINTER(svm_parameter)])
fillprototype(libsvm.svm_cross_validation, None, [POINTER(svm_problem), POINTER(svm_parameter), c_int, POINTER(c_double)])
fillprototype(libsvm.svm_save_model, c_int, [c_char_p, POINTER(svm_model)])
fillprototype(libsvm.svm_load_model, POINTER(svm_model), [c_char_p])
fillprototype(libsvm.svm_get_svm_type, c_int, [POINTER(svm_model)])
fillprototype(libsvm.svm_get_nr_class, c_int, [POINTER(svm_model)])
fillprototype(libsvm.svm_get_labels, None, [POINTER(svm_model), POINTER(c_int)])
fillprototype(libsvm.svm_get_sv_indices, None, [POINTER(svm_model), POINTER(c_int)])
fillprototype(libsvm.svm_get_nr_sv, c_int, [POINTER(svm_model)])
fillprototype(libsvm.svm_get_svr_probability, c_double, [POINTER(svm_model)])
fillprototype(libsvm.svm_predict_values, c_double, [POINTER(svm_model), POINTER(svm_node), POINTER(c_double)])
fillprototype(libsvm.svm_predict, c_double, [POINTER(svm_model), POINTER(svm_node)])
fillprototype(libsvm.svm_predict_probability, c_double, [POINTER(svm_model), POINTER(svm_node), POINTER(c_double)])
fillprototype(libsvm.svm_free_model_content, None, [POINTER(svm_model)])
fillprototype(libsvm.svm_free_and_destroy_model, None, [POINTER(POINTER(svm_model))])
fillprototype(libsvm.svm_destroy_param, None, [POINTER(svm_parameter)])
fillprototype(libsvm.svm_check_parameter, c_char_p, [POINTER(svm_problem), POINTER(svm_parameter)])
fillprototype(libsvm.svm_check_probability_model, c_int, [POINTER(svm_model)])
fillprototype(libsvm.svm_set_print_string_function, None, [PRINT_STRING_FUN])

View File

@@ -0,0 +1,263 @@
import os, sys
from .svm import *
from .svm import __all__ as svm_all
from .commonutil import *
from .commonutil import __all__ as common_all
try:
import numpy as np
import scipy
from scipy import sparse
except:
scipy = None
if sys.version_info[0] < 3:
range = xrange
from itertools import izip as zip
_cstr = lambda s: s.encode("utf-8") if isinstance(s,unicode) else str(s)
else:
_cstr = lambda s: bytes(s, "utf-8")
__all__ = ['svm_load_model', 'svm_predict', 'svm_save_model', 'svm_train'] + svm_all + common_all
def svm_load_model(model_file_name):
"""
svm_load_model(model_file_name) -> model
Load a LIBSVM model from model_file_name and return.
"""
model = libsvm.svm_load_model(_cstr(model_file_name))
if not model:
print("can't open model file %s" % model_file_name)
return None
model = toPyModel(model)
return model
def svm_save_model(model_file_name, model):
"""
svm_save_model(model_file_name, model) -> None
Save a LIBSVM model to the file model_file_name.
"""
libsvm.svm_save_model(_cstr(model_file_name), model)
def svm_train(arg1, arg2=None, arg3=None):
"""
svm_train(y, x [, options]) -> model | ACC | MSE
y: a list/tuple/ndarray of l true labels (type must be int/double).
x: 1. a list/tuple of l training instances. Feature vector of
each training instance is a list/tuple or dictionary.
2. an l * n numpy ndarray or scipy spmatrix (n: number of features).
svm_train(prob [, options]) -> model | ACC | MSE
svm_train(prob, param) -> model | ACC| MSE
Train an SVM model from data (y, x) or an svm_problem prob using
'options' or an svm_parameter param.
If '-v' is specified in 'options' (i.e., cross validation)
either accuracy (ACC) or mean-squared error (MSE) is returned.
options:
-s svm_type : set type of SVM (default 0)
0 -- C-SVC (multi-class classification)
1 -- nu-SVC (multi-class classification)
2 -- one-class SVM
3 -- epsilon-SVR (regression)
4 -- nu-SVR (regression)
-t kernel_type : set type of kernel function (default 2)
0 -- linear: u'*v
1 -- polynomial: (gamma*u'*v + coef0)^degree
2 -- radial basis function: exp(-gamma*|u-v|^2)
3 -- sigmoid: tanh(gamma*u'*v + coef0)
4 -- precomputed kernel (kernel values in training_set_file)
-d degree : set degree in kernel function (default 3)
-g gamma : set gamma in kernel function (default 1/num_features)
-r coef0 : set coef0 in kernel function (default 0)
-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
-m cachesize : set cache memory size in MB (default 100)
-e epsilon : set tolerance of termination criterion (default 0.001)
-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
-b probability_estimates : whether to train a model for probability estimates, 0 or 1 (default 0)
-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
-v n: n-fold cross validation mode
-q : quiet mode (no outputs)
"""
prob, param = None, None
if isinstance(arg1, (list, tuple)) or (scipy and isinstance(arg1, np.ndarray)):
assert isinstance(arg2, (list, tuple)) or (scipy and isinstance(arg2, (np.ndarray, sparse.spmatrix)))
y, x, options = arg1, arg2, arg3
param = svm_parameter(options)
prob = svm_problem(y, x, isKernel=(param.kernel_type == kernel_names.PRECOMPUTED))
elif isinstance(arg1, svm_problem):
prob = arg1
if isinstance(arg2, svm_parameter):
param = arg2
else:
param = svm_parameter(arg2)
if prob == None or param == None:
raise TypeError("Wrong types for the arguments")
if param.kernel_type == kernel_names.PRECOMPUTED:
for i in range(prob.l):
xi = prob.x[i]
idx, val = xi[0].index, xi[0].value
if idx != 0:
raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
if val <= 0 or val > prob.n:
raise ValueError('Wrong input format: sample_serial_number out of range')
if param.gamma == 0 and prob.n > 0:
param.gamma = 1.0 / prob.n
libsvm.svm_set_print_string_function(param.print_func)
err_msg = libsvm.svm_check_parameter(prob, param)
if err_msg:
raise ValueError('Error: %s' % err_msg)
if param.cross_validation:
l, nr_fold = prob.l, param.nr_fold
target = (c_double * l)()
libsvm.svm_cross_validation(prob, param, nr_fold, target)
ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
if param.svm_type in [svm_forms.EPSILON_SVR, svm_forms.NU_SVR]:
print("Cross Validation Mean squared error = %g" % MSE)
print("Cross Validation Squared correlation coefficient = %g" % SCC)
return MSE
else:
print("Cross Validation Accuracy = %g%%" % ACC)
return ACC
else:
m = libsvm.svm_train(prob, param)
m = toPyModel(m)
# If prob is destroyed, data including SVs pointed by m can remain.
m.x_space = prob.x_space
return m
def svm_predict(y, x, m, options=""):
"""
svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
y: a list/tuple/ndarray of l true labels (type must be int/double).
It is used for calculating the accuracy. Use [] if true labels are
unavailable.
x: 1. a list/tuple of l training instances. Feature vector of
each training instance is a list/tuple or dictionary.
2. an l * n numpy ndarray or scipy spmatrix (n: number of features).
Predict data (y, x) with the SVM model m.
options:
-b probability_estimates: whether to predict probability estimates,
0 or 1 (default 0).
-q : quiet mode (no outputs).
The return tuple contains
p_labels: a list of predicted labels
p_acc: a tuple including accuracy (for classification), mean-squared
error, and squared correlation coefficient (for regression).
p_vals: a list of decision values or probability estimates (if '-b 1'
is specified). If k is the number of classes, for decision values,
each element includes results of predicting k(k-1)/2 binary-class
SVMs. For probabilities, each element contains k values indicating
the probability that the testing instance is in each class.
Note that the order of classes here is the same as 'model.label'
field in the model structure.
"""
def info(s):
print(s)
if scipy and isinstance(x, np.ndarray):
x = np.ascontiguousarray(x) # enforce row-major
elif sparse and isinstance(x, sparse.spmatrix):
x = x.tocsr()
elif not isinstance(x, (list, tuple)):
raise TypeError("type of x: {0} is not supported!".format(type(x)))
if (not isinstance(y, (list, tuple))) and (not (scipy and isinstance(y, np.ndarray))):
raise TypeError("type of y: {0} is not supported!".format(type(y)))
predict_probability = 0
argv = options.split()
i = 0
while i < len(argv):
if argv[i] == '-b':
i += 1
predict_probability = int(argv[i])
elif argv[i] == '-q':
info = print_null
else:
raise ValueError("Wrong options")
i+=1
svm_type = m.get_svm_type()
is_prob_model = m.is_probability_model()
nr_class = m.get_nr_class()
pred_labels = []
pred_values = []
if scipy and isinstance(x, sparse.spmatrix):
nr_instance = x.shape[0]
else:
nr_instance = len(x)
if predict_probability:
if not is_prob_model:
raise ValueError("Model does not support probabiliy estimates")
if svm_type in [svm_forms.NU_SVR, svm_forms.EPSILON_SVR]:
info("Prob. model for test data: target value = predicted value + z,\n"
"z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
nr_class = 0
prob_estimates = (c_double * nr_class)()
for i in range(nr_instance):
if scipy and isinstance(x, sparse.spmatrix):
indslice = slice(x.indptr[i], x.indptr[i+1])
xi, idx = gen_svm_nodearray((x.indices[indslice], x.data[indslice]), isKernel=(m.param.kernel_type == kernel_names.PRECOMPUTED))
else:
xi, idx = gen_svm_nodearray(x[i], isKernel=(m.param.kernel_type == kernel_names.PRECOMPUTED))
label = libsvm.svm_predict_probability(m, xi, prob_estimates)
values = prob_estimates[:nr_class]
pred_labels += [label]
pred_values += [values]
else:
if is_prob_model:
info("Model supports probability estimates, but disabled in predicton.")
if svm_type in [svm_forms.ONE_CLASS, svm_forms.EPSILON_SVR, svm_forms.NU_SVC]:
nr_classifier = 1
else:
nr_classifier = nr_class*(nr_class-1)//2
dec_values = (c_double * nr_classifier)()
for i in range(nr_instance):
if scipy and isinstance(x, sparse.spmatrix):
indslice = slice(x.indptr[i], x.indptr[i+1])
xi, idx = gen_svm_nodearray((x.indices[indslice], x.data[indslice]), isKernel=(m.param.kernel_type == kernel_names.PRECOMPUTED))
else:
xi, idx = gen_svm_nodearray(x[i], isKernel=(m.param.kernel_type == kernel_names.PRECOMPUTED))
label = libsvm.svm_predict_values(m, xi, dec_values)
if(nr_class == 1):
values = [1]
else:
values = dec_values[:nr_classifier]
pred_labels += [label]
pred_values += [values]
if len(y) == 0:
y = [0] * nr_instance
ACC, MSE, SCC = evaluations(y, pred_labels)
if svm_type in [svm_forms.EPSILON_SVR, svm_forms.NU_SVR]:
info("Mean squared error = %g (regression)" % MSE)
info("Squared correlation coefficient = %g (regression)" % SCC)
else:
info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(round(nr_instance*ACC/100)), nr_instance))
return pred_labels, (ACC, MSE, SCC), pred_values

123
libsvm-3.36/python/setup.py Normal file
View File

@@ -0,0 +1,123 @@
#!/usr/bin/env python
import sys, os
from os import path
from shutil import copyfile, rmtree
from glob import glob
from setuptools import setup, Extension
from distutils.command.clean import clean as clean_cmd
# a technique to build a shared library on windows
from distutils.command.build_ext import build_ext
build_ext.get_export_symbols = lambda x, y: []
PACKAGE_DIR = "libsvm"
PACKAGE_NAME = "libsvm-official"
VERSION = "3.36.0"
cpp_dir = "cpp-source"
# should be consistent with dynamic_lib_name in libsvm/svm.py
dynamic_lib_name = "clib"
# sources to be included to build the shared library
source_codes = [
"svm.cpp",
]
headers = [
"svm.h",
"svm.def",
]
# license parameters
license_source = path.join("..", "COPYRIGHT")
license_file = "LICENSE"
license_name = "BSD-3-Clause"
kwargs_for_extension = {
"sources": [path.join(cpp_dir, f) for f in source_codes],
"depends": [path.join(cpp_dir, f) for f in headers],
"include_dirs": [cpp_dir],
"language": "c++",
}
# see ../Makefile.win and enable openmp
if sys.platform == "win32":
kwargs_for_extension.update(
{
"define_macros": [("_WIN64", ""), ("_CRT_SECURE_NO_DEPRECATE", "")],
"extra_link_args": [r"-DEF:{}\svm.def".format(cpp_dir)],
"extra_compile_args": ["/openmp"],
}
)
else:
kwargs_for_extension.update(
{
"extra_compile_args": ["-fopenmp"],
"extra_link_args": ["-fopenmp"],
}
)
def create_cpp_source():
for f in source_codes + headers:
src_file = path.join("..", f)
tgt_file = path.join(cpp_dir, f)
# ensure blas directory is created
os.makedirs(path.dirname(tgt_file), exist_ok=True)
copyfile(src_file, tgt_file)
class CleanCommand(clean_cmd):
def run(self):
clean_cmd.run(self)
to_be_removed = ["build/", "dist/", "MANIFEST", cpp_dir, "{}.egg-info".format(PACKAGE_NAME), license_file]
to_be_removed += glob("./{}/{}.*".format(PACKAGE_DIR, dynamic_lib_name))
for root, dirs, files in os.walk(os.curdir, topdown=False):
if "__pycache__" in dirs:
to_be_removed.append(path.join(root, "__pycache__"))
to_be_removed += [f for f in files if f.endswith(".pyc")]
for f in to_be_removed:
print("remove {}".format(f))
if f == ".":
continue
elif path.isfile(f):
os.remove(f)
elif path.isdir(f):
rmtree(f)
def main():
if not path.exists(cpp_dir):
create_cpp_source()
if not path.exists(license_file):
copyfile(license_source, license_file)
with open("README") as f:
long_description = f.read()
setup(
name=PACKAGE_NAME,
packages=[PACKAGE_DIR],
version=VERSION,
description="Python binding of LIBSVM",
long_description=long_description,
long_description_content_type="text/plain",
author="ML group @ National Taiwan University",
author_email="cjlin@csie.ntu.edu.tw",
url="https://www.csie.ntu.edu.tw/~cjlin/libsvm",
license=license_name,
install_requires=["scipy"],
ext_modules=[
Extension(
"{}.{}".format(PACKAGE_DIR, dynamic_lib_name), **kwargs_for_extension
)
],
cmdclass={"clean": CleanCommand},
)
main()