-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathlinearSVM.hpp
208 lines (181 loc) · 8.37 KB
/
linearSVM.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
//**********************************************************************
//* This file is a part of the CANUPO project, a set of programs for *
//* classifying automatically 3D point clouds according to the local *
//* multi-scale dimensionality at each point. *
//* *
//* Author & Copyright: Nicolas Brodu <nicolas.brodu@numerimoire.net> *
//* *
//* This project is free software; you can redistribute it and/or *
//* modify it under the terms of the GNU Lesser General Public *
//* License as published by the Free Software Foundation; either *
//* version 2.1 of the License, or (at your option) any later version. *
//* *
//* This library is distributed in the hope that it will be useful, *
//* but WITHOUT ANY WARRANTY; without even the implied warranty of *
//* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
//* Lesser General Public License for more details. *
//* *
//* You should have received a copy of the GNU Lesser General Public *
//* License along with this library; if not, write to the Free *
//* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, *
//* MA 02110-1301 USA *
//* *
//**********************************************************************/
#ifndef CANUPO_LINEAR_SVM_HPP
#define CANUPO_LINEAR_SVM_HPP
#include <iostream>
#include <vector>
#include <limits>
#include "points.hpp"
#ifdef _OPENMP
#include <omp.h>
#include "dlib/svm_threaded.h"
#include "dlib/threads/multithreaded_object_extension.cpp"
#include "dlib/threads/threaded_object_extension.cpp"
#include "dlib/threads/threads_kernel_1.cpp"
#include "dlib/threads/threads_kernel_2.cpp"
#include "dlib/threads/threads_kernel_shared.cpp"
#include "dlib/threads/thread_pool_extension.cpp"
#else
#include "dlib/svm.h"
#endif
struct LinearSVM {
typedef dlib::matrix<double, 0, 1> sample_type;
typedef dlib::linear_kernel<sample_type> kernel_type;
int grid_size;
double trainer_batch_rate;
LinearSVM(int _grid_size) : grid_size(_grid_size), trainer_batch_rate(0.1) {}
struct cross_validation_objective {
cross_validation_objective (
const std::vector<sample_type>& samples_,
const std::vector<double>& labels_,
int _nfolds,
LinearSVM* _lsvm
) : samples(samples_), labels(labels_), nfolds(_nfolds), lsvm(_lsvm) {}
double operator() (double logarg) const {
using namespace dlib;
// see below for changes from dlib examples
const double arg = exp(logarg);
matrix<double> result;
// Make an SVM trainer and tell it what the parameters are supposed to be.
#ifdef SVM_NU_TRAINER
svm_nu_trainer<kernel_type> trainer;
trainer.set_kernel(kernel_type());
trainer.set_nu(arg);
#if defined(_OPENMP)
// multi-thread here only if not at the caller level
if (lsvm->grid_size==1) {
result = cross_validate_trainer_threaded(trainer, samples, labels, nfolds, omp_get_num_threads());
} else
#endif
result = cross_validate_trainer(trainer, samples, labels, nfolds);
#else
svm_pegasos<kernel_type> trainer;
trainer.set_kernel(kernel_type());
trainer.set_lambda(arg);
#if defined(_OPENMP)
// multi-thread here only if not at the caller level
if (lsvm->grid_size==1) {
result = cross_validate_trainer_threaded(batch_cached(trainer,lsvm->trainer_batch_rate), samples, labels, nfolds, omp_get_num_threads());
} else
#endif
result = cross_validate_trainer(batch_cached(trainer,lsvm->trainer_batch_rate), samples, labels, nfolds);
#endif
return sum(result);
}
const std::vector<sample_type>& samples;
const std::vector<double>& labels;
int nfolds;
LinearSVM* lsvm;
};
double crossValidate(int nfolds, const std::vector<sample_type>& samples, const std::vector<double>& labels) {
using namespace dlib;
using namespace std;
double best_score = -1;
double best_logarg = log(1e-4);
cout << "Optimising the SVM" << flush;
#ifdef SVM_NU_TRAINER
// largest allowed nu: strictly below what's returned by maximum_nu
double max_arg = 0.999*maximum_nu(labels);
double min_arg = max_arg * 1e-3;
double lmax = log(max_arg);
double lmin = log(min_arg);
#else
// arg is actually lambda
double lmin = log(1e-6);
double lmax = log(1e-2);
#endif
#pragma omp parallel for schedule(dynamic)
for (int gidx = 1; gidx<=grid_size; ++gidx) {
cout << "." << flush;
double larg = lmin + (lmax - lmin) * gidx / (grid_size + 1.0);
double score = find_max_single_variable(
cross_validation_objective(samples, labels, nfolds, this), // Function to maximize
larg, // starting point and result
lmin, // lower bound, log(1e-6)
lmax, // upper bound
// precision (here on the sum of both class accuracies)
2e-4,
100 // max number of iterations
);
#pragma omp critical
if (score > best_score) {
best_score = score;
best_logarg = larg;
}
}
cout << endl;
cout << "cross-validated balanced accuracy = " << 0.5 * best_score << endl;
return (double)exp(best_logarg);
}
void train(int nfolds, double nu, const std::vector<sample_type>& samples, const std::vector<double>& labels) {
using namespace dlib;
#ifdef SVM_NU_TRAINER
svm_nu_trainer<kernel_type> trainer;
trainer.set_nu(nu);
trainer.set_kernel(kernel_type());
#else
svm_pegasos<kernel_type> pegasos_trainer;
pegasos_trainer.set_lambda(nu);
pegasos_trainer.set_kernel(kernel_type());
batch_trainer<svm_pegasos<kernel_type> > trainer = batch_cached(pegasos_trainer,trainer_batch_rate);
#endif
int dim = samples.back().size();
// linear kernel: convert the decision function to an hyperplane rather than support vectors
// This is equivalent but way more efficient for later scene classification
//decision_function<kernel_type> decfun = trainer.train(samples, labels);
probabilistic_decision_function<kernel_type> pdecfun = train_probabilistic_decision_function(trainer, samples, labels, nfolds);
decision_function<kernel_type>& decfun = pdecfun.decision_funct;
weights.clear();
weights.resize(dim+1, 0);
matrix<double> w(dim,1);
w = 0;
for (int i=0; i<decfun.alpha.nr(); ++i) {
w += decfun.alpha(i) * decfun.basis_vectors(i);
}
for (int i=0; i<dim; ++i) weights[i] = w(i);
weights[dim] = -decfun.b;
// p(x) = 1/(1+exp(alpha*d(x)+beta))
// with d(x) the oriented dist the decision function
// linear kernel: equivalently shift and scale the values
// d(x) = w.x + w[dim]
// So in the final classifier we have d(x)=0 matching the probabilistic decfun
for (int i=0; i<=dim; ++i) weights[i] *= pdecfun.alpha;
weights[dim] += pdecfun.beta;
// revert the decision function so we compute proba to be in the -1 class, i.e.
// the first class given to the classifier program
for (int i=0; i<=dim; ++i) weights[i] = -weights[i];
// note: we now have comparable proba for dx and dy
// as 1 / (1+exp(dx)) = 1 / (1+exp(dy)) means same dx and dy
// in this new space
// => consistant orthogonal axis
}
double predict(const sample_type& data) {
int dim = weights.size()-1;
double ret = weights[dim];
for (int d=0; d<dim; ++d) ret += weights[d] * data(d);
return ret;
}
std::vector<double> weights;
};
#endif