-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvgg_recognizer.py
168 lines (154 loc) · 7.26 KB
/
vgg_recognizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
'''
Author: Kevin Wang
Last updated: 6/5/16 by Sanket Satpathy
Used with Python 2.7
Description:
A general purpose face recognizer using the VGG net for feature extraction. Requires
Lasagne on the computer for running the neural net, as well as other requirements.
Supports training on a set of single sample images, which are projected into a
128-dimension feature space, and efficient nearest neighbor search in that feature
space for prediction of new faces.
'''
import time
import argparse
import cv2
import os
import numpy as np
np.set_printoptions(precision=2)
import sys
sys.path.insert(0, './vgg_feature')
import vgg_feature
from sklearn.neighbors import NearestNeighbors
# flag for saving numpy array (1), or loading an old one (0)
saveon = 1
include_flip = False#True
include_truncation = False#True
include_rotation = False#True
include_blur = False#True
include_horizontal_blur = True
include_brightness_gradient = False
def rotate(img, angle):
# rotates image by angle (with zero-padding)
h, w = img.shape[:2]
center = (w/2, h/2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
return cv2.warpAffine(img, M, (w, h))
def brightness_gradient(img, n=2):
# adds a horizontal brightness gradient to image
# parameter n controls the nonlinearity according to which shading is applied
# this effect attempts to mimic lighting conditions in the testing environment
gradient = np.linspace(0.1, 1, img.shape[1])
gradient = gradient ** n # nonlinearity (multiplied to create shading)
# gradient = 1 / (1 + np.exp(-20 * (gradient - 0.5)))
x = np.array([], dtype='uint8').reshape((img.shape[0], 0, img.shape[2]))
for i in xrange(img.shape[1]):
shade = (img[:,i,:] * gradient[i]).reshape((img.shape[0], 1, img.shape[2])).astype('uint8')
x = np.hstack((x, shade))
return x
class VGGRecognizer:
# maps point to label in NN implementation
ylabels = None
# if the recognizer has been trained before
initialized = False
# reference to the VGG neural net
net = None
# nearest neighbors sklearn object
nbrs = None
# number of nearest neighbors to use
# knn = 1
# constructor -- use the default parameters for the best classifier
# dire contains the location of the pickled weights
def __init__(self, dire='./vgg_feature/', knn=1):
self.initialized = False
self.net = vgg_feature.load_weights(dire=dire)
self.knn = knn
# train the recognizer on the given set of images, with corresponding labels.
# return a copy of the labels
def train(self, images, labels):
# Mark that we have trained now
if not self.initialized:
self.initialized = True
else:
print 'Warning: Training again will override previous faces'
self.ylabels = []
tr_features = None
if saveon == 1:
for i,l in zip(images,labels):
print 'VGG: ', l
if len(i.shape) == 2:
i = i[:, :, np.newaxis]
rgbi = np.repeat(i, 3, axis=2)
else:
rgbi = i
if rgbi is not None:
feature = vgg_feature.get_feature(rgbi, self.net)
if tr_features is None:
tr_features = feature
else:
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
# include the LR flip in training
if include_flip:
feature = vgg_feature.get_feature(np.fliplr(rgbi), self.net)
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
if include_truncation:
for j in xrange(1,6):
feature = vgg_feature.get_feature(rgbi[j * rgbi.shape[0]/10:,:,:], self.net) # remove upper face
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
feature = vgg_feature.get_feature(rgbi[:-j * rgbi.shape[0]/10,:,:], self.net) # remove lower face
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
if include_rotation:
for angle in [-10, 10]:
feature = vgg_feature.get_feature(rotate(rgbi, angle), self.net) # rotate face
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
if include_blur:
for blur_radius in [10]:
feature = vgg_feature.get_feature(cv2.blur(rgbi, (blur_radius, blur_radius), 0), self.net) # blur face
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
if include_horizontal_blur:
for blur_radius in [10]:
side = 2*blur_radius+1
kernel = np.vstack((np.zeros((blur_radius, side), np.float32), np.ones((1, side), np.float32)/side, np.zeros((blur_radius, side), np.float32)))
img = cv2.filter2D(rgbi, -1, kernel)
feature = vgg_feature.get_feature(img, self.net) # blur face
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
if include_brightness_gradient:
feature = vgg_feature.get_feature(brightnes_gradient(rgbi), self.net)
tr_features = np.vstack((tr_features, feature))
self.ylabels.append(l)
np.save('vgg_ylabels.npy', self.ylabels)
np.save('vgg_features.npy', tr_features)
else:
self.ylabels = np.load('vgg_ylabels.npy')
tr_features = np.load('vgg_features.npy')
self.nbrs = NearestNeighbors(n_neighbors=self.knn, algorithm='ball_tree').fit(tr_features)
return np.copy(self.ylabels)
# predict the class of the input image. returns None if it runs into errors
# during the feature extraction, otherwise returns the label of the nearest point
def predict(self, image):
if not self.initialized:
print 'Train before predicting!'
return
pred_feature = vgg_feature.get_feature(image, self.net)
dist, ind = self.nbrs.kneighbors(pred_feature)
return self.ylabels[ind[0,0]]
# verbose prediction of the class of the input image. returns None if it runs
# into errors during the embedding, otherwise returns the distances
# and indices of the nearest neighbor search
def verbose_predict(self, image):
if not self.initialized:
print 'Train before predicting!'
return
now = time.time()
pred_feature = vgg_feature.get_feature(image, self.net)
print '\tTime to compute VGG feature : {0:.2f} s'.format(time.time() - now)
now = time.time()
dist, ind = self.nbrs.kneighbors(pred_feature)
print '\tTime to compute nearest neighbors : {0:.2f} s'.format(time.time() - now)
return dist, ind