Files
SVMClassifier/data__converter_8cpp_source.html
2025-06-22 11:25:27 +00:00

65 KiB

<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US"> <head> <script type="text/javascript" src="jquery.js"></script> <script type="text/javascript" src="dynsections.js"></script> <script type="text/javascript" src="search/searchdata.js"></script> <script type="text/javascript" src="search/search.js"></script> </head>
SVM Classifier C++ 1.0.0
High-performance Support Vector Machine classifier with scikit-learn compatible API
<script type="text/javascript"> /* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt MIT */ var searchBox = new SearchBox("searchBox", "search/",'.html'); /* @license-end */ </script> <script type="text/javascript" src="menudata.js"></script> <script type="text/javascript" src="menu.js"></script> <script type="text/javascript"> /* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt MIT */ $(function() { initMenu('',true,false,'search.php','Search'); $(document).ready(function() { init_search(); }); }); /* @license-end */ </script>
<script type="text/javascript"> /* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt MIT */ $(document).ready(function() { init_codefold(0); }); /* @license-end */ </script>
Loading...
Searching...
No Matches
data_converter.cpp
1#include "svm_classifier/data_converter.hpp"
2#include "svm.h" // libsvm
3#include "linear.h" // liblinear
4#include <stdexcept>
5#include <iostream>
6#include <cmath>
7
8namespace svm_classifier {
9
11 : n_features_(0)
12 , n_samples_(0)
13 , sparse_threshold_(1e-8)
14 {
15 }
16
21
22 std::unique_ptr<svm_problem> DataConverter::to_svm_problem(const torch::Tensor& X,
23 const torch::Tensor& y)
24 {
25 validate_tensors(X, y);
26
27 auto X_cpu = ensure_cpu_tensor(X);
28
29 n_samples_ = X_cpu.size(0);
30 n_features_ = X_cpu.size(1);
31
32 // Convert tensor data to svm_node structures
33 svm_nodes_storage_ = tensor_to_svm_nodes(X_cpu);
34
35 // Prepare pointers for svm_problem
36 svm_x_space_.clear();
37 svm_x_space_.reserve(n_samples_);
38
39 for (auto& nodes : svm_nodes_storage_) {
40 svm_x_space_.push_back(nodes.data());
41 }
42
43 // Extract labels if provided
44 if (y.defined() && y.numel() > 0) {
45 svm_y_space_ = extract_labels(y);
46 } else {
47 svm_y_space_.clear();
48 svm_y_space_.resize(n_samples_, 0.0); // Dummy labels for prediction
49 }
50
51 // Create svm_problem
52 auto problem = std::make_unique<svm_problem>();
53 problem->l = n_samples_;
54 problem->x = svm_x_space_.data();
55 problem->y = svm_y_space_.data();
56
57 return problem;
58 }
59
60 std::unique_ptr<problem> DataConverter::to_linear_problem(const torch::Tensor& X,
61 const torch::Tensor& y)
62 {
63 validate_tensors(X, y);
64
65 auto X_cpu = ensure_cpu_tensor(X);
66
67 n_samples_ = X_cpu.size(0);
68 n_features_ = X_cpu.size(1);
69
70 // Convert tensor data to feature_node structures
71 linear_nodes_storage_ = tensor_to_linear_nodes(X_cpu);
72
73 // Prepare pointers for problem
74 linear_x_space_.clear();
75 linear_x_space_.reserve(n_samples_);
76
77 for (auto& nodes : linear_nodes_storage_) {
78 linear_x_space_.push_back(nodes.data());
79 }
80
81 // Extract labels if provided
82 if (y.defined() && y.numel() > 0) {
83 linear_y_space_ = extract_labels(y);
84 } else {
85 linear_y_space_.clear();
86 linear_y_space_.resize(n_samples_, 0.0); // Dummy labels for prediction
87 }
88
89 // Create problem
90 auto linear_problem = std::make_unique<problem>();
91 linear_problem->l = n_samples_;
92 linear_problem->n = n_features_;
93 linear_problem->x = linear_x_space_.data();
94 linear_problem->y = linear_y_space_.data();
95 linear_problem->bias = -1; // No bias term by default
96
97 return linear_problem;
98 }
99
100 svm_node* DataConverter::to_svm_node(const torch::Tensor& sample)
101 {
102 validate_tensor_properties(sample, 1, "sample");
103
104 auto sample_cpu = ensure_cpu_tensor(sample);
105 single_svm_nodes_ = sample_to_svm_nodes(sample_cpu);
106
107 return single_svm_nodes_.data();
108 }
109
110 feature_node* DataConverter::to_feature_node(const torch::Tensor& sample)
111 {
112 validate_tensor_properties(sample, 1, "sample");
113
114 auto sample_cpu = ensure_cpu_tensor(sample);
115 single_linear_nodes_ = sample_to_linear_nodes(sample_cpu);
116
117 return single_linear_nodes_.data();
118 }
119
120 torch::Tensor DataConverter::from_predictions(const std::vector<double>& predictions)
121 {
122 auto options = torch::TensorOptions().dtype(torch::kInt32);
123 auto tensor = torch::zeros({ static_cast<int64_t>(predictions.size()) }, options);
124
125 for (size_t i = 0; i < predictions.size(); ++i) {
126 tensor[i] = static_cast<int>(predictions[i]);
127 }
128
129 return tensor;
130 }
131
132 torch::Tensor DataConverter::from_probabilities(const std::vector<std::vector<double>>& probabilities)
133 {
134 if (probabilities.empty()) {
135 return torch::empty({ 0, 0 });
136 }
137
138 int n_samples = static_cast<int>(probabilities.size());
139 int n_classes = static_cast<int>(probabilities[0].size());
140
141 auto tensor = torch::zeros({ n_samples, n_classes }, torch::kFloat64);
142
143 for (int i = 0; i < n_samples; ++i) {
144 for (int j = 0; j < n_classes; ++j) {
145 tensor[i][j] = probabilities[i][j];
146 }
147 }
148
149 return tensor;
150 }
151
152 torch::Tensor DataConverter::from_decision_values(const std::vector<std::vector<double>>& decision_values)
153 {
154 if (decision_values.empty()) {
155 return torch::empty({ 0, 0 });
156 }
157
158 int n_samples = static_cast<int>(decision_values.size());
159 int n_values = static_cast<int>(decision_values[0].size());
160
161 auto tensor = torch::zeros({ n_samples, n_values }, torch::kFloat64);
162
163 for (int i = 0; i < n_samples; ++i) {
164 for (int j = 0; j < n_values; ++j) {
165 tensor[i][j] = decision_values[i][j];
166 }
167 }
168
169 return tensor;
170 }
171
172 void DataConverter::validate_tensors(const torch::Tensor& X, const torch::Tensor& y)
173 {
174 validate_tensor_properties(X, 2, "X");
175
176 if (y.defined() && y.numel() > 0) {
177 validate_tensor_properties(y, 1, "y");
178
179 // Check that number of samples match
180 if (X.size(0) != y.size(0)) {
181 throw std::invalid_argument(
182 "Number of samples in X (" + std::to_string(X.size(0)) +
183 ") does not match number of labels in y (" + std::to_string(y.size(0)) + ")"
184 );
185 }
186 }
187
188 // Check for reasonable dimensions
189 if (X.size(0) == 0) {
190 throw std::invalid_argument("X cannot have 0 samples");
191 }
192
193 if (X.size(1) == 0) {
194 throw std::invalid_argument("X cannot have 0 features");
195 }
196 }
197
199 {
200 svm_nodes_storage_.clear();
201 svm_x_space_.clear();
202 svm_y_space_.clear();
203
204 linear_nodes_storage_.clear();
205 linear_x_space_.clear();
206 linear_y_space_.clear();
207
208 single_svm_nodes_.clear();
209 single_linear_nodes_.clear();
210
211 n_features_ = 0;
212 n_samples_ = 0;
213 }
214
215 std::vector<std::vector<svm_node>> DataConverter::tensor_to_svm_nodes(const torch::Tensor& X)
216 {
217 std::vector<std::vector<svm_node>> nodes_storage;
218 nodes_storage.reserve(X.size(0));
219
220 auto X_acc = X.accessor<float, 2>();
221
222 for (int i = 0; i < X.size(0); ++i) {
223 nodes_storage.push_back(sample_to_svm_nodes(X[i]));
224 }
225
226 return nodes_storage;
227 }
228
229 std::vector<std::vector<feature_node>> DataConverter::tensor_to_linear_nodes(const torch::Tensor& X)
230 {
231 std::vector<std::vector<feature_node>> nodes_storage;
232 nodes_storage.reserve(X.size(0));
233
234 for (int i = 0; i < X.size(0); ++i) {
235 nodes_storage.push_back(sample_to_linear_nodes(X[i]));
236 }
237
238 return nodes_storage;
239 }
240
241 std::vector<svm_node> DataConverter::sample_to_svm_nodes(const torch::Tensor& sample)
242 {
243 std::vector<svm_node> nodes;
244
245 auto sample_acc = sample.accessor<float, 1>();
246
247 // Reserve space (worst case: all features are non-sparse)
248 nodes.reserve(sample.size(0) + 1); // +1 for terminator
249
250 for (int j = 0; j < sample.size(0); ++j) {
251 double value = static_cast<double>(sample_acc[j]);
252
253 // Skip sparse features
254 if (std::abs(value) > sparse_threshold_) {
255 svm_node node;
256 node.index = j + 1; // libsvm uses 1-based indexing
257 node.value = value;
258 nodes.push_back(node);
259 }
260 }
261
262 // Add terminator
263 svm_node terminator;
264 terminator.index = -1;
265 terminator.value = 0;
266 nodes.push_back(terminator);
267
268 return nodes;
269 }
270
271 std::vector<feature_node> DataConverter::sample_to_linear_nodes(const torch::Tensor& sample)
272 {
273 std::vector<feature_node> nodes;
274
275 auto sample_acc = sample.accessor<float, 1>();
276
277 // Reserve space (worst case: all features are non-sparse)
278 nodes.reserve(sample.size(0) + 1); // +1 for terminator
279
280 for (int j = 0; j < sample.size(0); ++j) {
281 double value = static_cast<double>(sample_acc[j]);
282
283 // Skip sparse features
284 if (std::abs(value) > sparse_threshold_) {
285 feature_node node;
286 node.index = j + 1; // liblinear uses 1-based indexing
287 node.value = value;
288 nodes.push_back(node);
289 }
290 }
291
292 // Add terminator
293 feature_node terminator;
294 terminator.index = -1;
295 terminator.value = 0;
296 nodes.push_back(terminator);
297
298 return nodes;
299 }
300
301 std::vector<double> DataConverter::extract_labels(const torch::Tensor& y)
302 {
303 auto y_cpu = ensure_cpu_tensor(y);
304 std::vector<double> labels;
305 labels.reserve(y_cpu.size(0));
306
307 // Handle different tensor types
308 if (y_cpu.dtype() == torch::kInt32) {
309 auto y_acc = y_cpu.accessor<int32_t, 1>();
310 for (int i = 0; i < y_cpu.size(0); ++i) {
311 labels.push_back(static_cast<double>(y_acc[i]));
312 }
313 } else if (y_cpu.dtype() == torch::kInt64) {
314 auto y_acc = y_cpu.accessor<int64_t, 1>();
315 for (int i = 0; i < y_cpu.size(0); ++i) {
316 labels.push_back(static_cast<double>(y_acc[i]));
317 }
318 } else if (y_cpu.dtype() == torch::kFloat32) {
319 auto y_acc = y_cpu.accessor<float, 1>();
320 for (int i = 0; i < y_cpu.size(0); ++i) {
321 labels.push_back(static_cast<double>(y_acc[i]));
322 }
323 } else if (y_cpu.dtype() == torch::kFloat64) {
324 auto y_acc = y_cpu.accessor<double, 1>();
325 for (int i = 0; i < y_cpu.size(0); ++i) {
326 labels.push_back(y_acc[i]);
327 }
328 } else {
329 throw std::invalid_argument("Unsupported label tensor dtype");
330 }
331
332 return labels;
333 }
334
335 torch::Tensor DataConverter::ensure_cpu_tensor(const torch::Tensor& tensor)
336 {
337 if (tensor.device().type() != torch::kCPU) {
338 return tensor.to(torch::kCPU);
339 }
340
341 // Convert to float32 if not already
342 if (tensor.dtype() != torch::kFloat32) {
343 return tensor.to(torch::kFloat32);
344 }
345
346 return tensor;
347 }
348
349 void DataConverter::validate_tensor_properties(const torch::Tensor& tensor,
350 int expected_dims,
351 const std::string& name)
352 {
353 if (!tensor.defined()) {
354 throw std::invalid_argument(name + " tensor is not defined");
355 }
356
357 if (tensor.dim() != expected_dims) {
358 throw std::invalid_argument(
359 name + " must have " + std::to_string(expected_dims) +
360 " dimensions, got " + std::to_string(tensor.dim())
361 );
362 }
363
364 if (tensor.numel() == 0) {
365 throw std::invalid_argument(name + " tensor cannot be empty");
366 }
367
368 // Check for NaN or Inf values
369 if (torch::any(torch::isnan(tensor)).item<bool>()) {
370 throw std::invalid_argument(name + " contains NaN values");
371 }
372
373 if (torch::any(torch::isinf(tensor)).item<bool>()) {
374 throw std::invalid_argument(name + " contains infinite values");
375 }
376 }
377
378} // namespace svm_classifier
svm_node * to_svm_node(const torch::Tensor &sample)
Convert single sample to libsvm format.
void cleanup()
Clean up all allocated memory.
torch::Tensor from_decision_values(const std::vector< std::vector< double > > &decision_values)
Convert decision values back to PyTorch tensor.
torch::Tensor from_probabilities(const std::vector< std::vector< double > > &probabilities)
Convert probabilities back to PyTorch tensor.
DataConverter()
Default constructor.
std::unique_ptr< svm_problem > to_svm_problem(const torch::Tensor &X, const torch::Tensor &y=torch::Tensor())
Convert PyTorch tensors to libsvm format.
feature_node * to_feature_node(const torch::Tensor &sample)
Convert single sample to liblinear format.
std::unique_ptr< problem > to_linear_problem(const torch::Tensor &X, const torch::Tensor &y=torch::Tensor())
Convert PyTorch tensors to liblinear format.
void validate_tensors(const torch::Tensor &X, const torch::Tensor &y=torch::Tensor())
Validate input tensors.
torch::Tensor from_predictions(const std::vector< double > &predictions)
Convert predictions back to PyTorch tensor.
~DataConverter()
Destructor - cleans up allocated memory.

Generated on Sun Jun 22 2025 11:25:27 for SVM Classifier C++ by doxygen 1.9.8 </html>