-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetectinvideo.py
494 lines (395 loc) · 20.9 KB
/
detectinvideo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
# -*- coding: utf-8 -*-
"""
This module deals with processing of video and images.
__print_progress() - Show the progress during a running of the video extraction in video_to_frames()
__extract_frames() - Used for the actual extraction of frames from video called from video_to_frames()
video_to_frames() - Takes a video, extract images, and save them
UseDirectory2CrtTrainFrames() - Extract a video to a specific path
CopyExtractFrames2TrainDir() - After the user chooses which image to leave in the directory then this function moves
the images to the relevant train path
preparePixelLibModel() - Read the model from the pixelLib repository.
Segment_a_Video() - Read a video and color all the pixels that are in the desired object (segmentation).
Segment_an_Image() - Read an image and color all the pixels that are in the desired object (segmentation).
ChangeBackg2Gray - Gets a video and then change all the pixels that are not the desired object to grayscale.
ChangeBackground - Gets a background image & a video. Change the pixels that aren't the object to the background image.
PrepareSSDMobile() - Read the SSD model from the repository
FindObjectInImage() - Gets an image and return the box around the desired object
CreateBoxAndLabelVideo() - Gets a video and return it rendered with a box around the desired object
CrtBoxAndLblImg() - Get an image ans the desired object and return the image with a box and a label of the class
"""
import numpy as np
import os
import sys
import cv2
import multiprocessing
from concurrent.futures import ProcessPoolExecutor, as_completed
from shutil import copyfile
import urllib.request
from tensorflow.keras.layers import BatchNormalization
import pixellib
import urllib
from pixellib.instance import instance_segmentation
from pixellib.tune_bg import alter_bg
def __print_progress(iteration, total, prefix='', suffix='', decimals=3, bar_length=100):
"""
Call in a loop to create standard out progress bar
:param iteration: current iteration
:param total: total iterations
:param prefix: prefix string
:param suffix: suffix string
:param decimals: positive number of decimals in percent complete
:param bar_length: character length of bar
:return: None
"""
if total > 0:
format_str = "{0:." + str(decimals) + "f}" # format the % done number string
percents = format_str.format(100 * (iteration / float(total))) # calculate the % done
filled_length = int(round(bar_length * iteration / float(total))) # calculate the filled bar length
bar = '#' * filled_length + '-' * (bar_length - filled_length) # generate the bar string
sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), # write out the bar
sys.stdout.flush() # flush to stdout
def __extract_frames(video_path, frames_dir, overwrite=True, start=-1, end=-1, every=1, DifferntDirectory=True,
Name=''):
"""
Extract frames from a video using OpenCVs VideoCapture
:param video_path: path of the video
:param frames_dir: the directory to save the frames
:param overwrite: to overwrite frames that already exist?
:param start: start frame
:param end: end frame
:param every: frame spacing
:param DifferntDirectory: If true then every video gets its own directory
:param Name: If DifferntDirectory=True then it adds the name to the file name
:return: count of images saved
"""
video_path = os.path.normpath(video_path) # make the paths OS (Windows) compatible
frames_dir = os.path.normpath(frames_dir) # make the paths OS (Windows) compatible
video_dir, video_filename = os.path.split(video_path) # get the video path and filename from the path
assert os.path.exists(video_path) # assert the video file exists
capture = cv2.VideoCapture(video_path) # open the video using OpenCV
if start < 0: # if start isn't specified lets assume 0
start = 0
if end < 0: # if end isn't specified assume the end of the video
end = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
capture.set(1, start) # set the starting frame of the capture
frame = start # keep track of which frame we are up to, starting from start
while_safety = 0 # a safety counter to ensure we don't enter an infinite while loop (hopefully we won't need it)
saved_count = 0 # a count of how many frames we have saved
while frame < end: # lets loop through the frames until the end
_, image = capture.read() # read an image from the capture
if while_safety > 500: # break the while if our safety maxs out at 500
break
# sometimes OpenCV reads None's during a video, in which case we want to just skip
if image is None: # if we get a bad return flag or the image we read is None, lets not save
while_safety += 1 # add 1 to our while safety, since we skip before incrementing our frame variable
continue # skip
if frame % every == 0: # if this is a frame we want to write out based on the 'every' argument
while_safety = 0 # reset the safety count
if DifferntDirectory:
save_path = os.path.join(frames_dir, video_filename,
"{:010d}.jpg".format(frame)) # create the save path
else:
OutFileName = Name + "{:010d}.jpg".format(frame)
save_path = os.path.join(frames_dir, OutFileName) # create the save path
if not os.path.exists(save_path) or overwrite: # if it doesn't exist or we want to overwrite anyways
cv2.imwrite(save_path, image) # save the extracted image
saved_count += 1 # increment our counter by one
frame += 1 # increment our frame count
capture.release() # after the while has finished close the capture
return saved_count # and return the count of the images we saved
def video_to_frames(video_path, frames_dir, overwrite=True, every=1, chunk_size=1000, DifferntDirectory=True, Name=''):
"""
Extracts the frames from a video using multiprocessing
:param video_path: path to the video
:param frames_dir: directory to save the frames
:param overwrite: overwrite frames if they exist?
:param every: extract every this many frames
:param chunk_size: how many frames to split into chunks (one chunk per cpu core process)
:param DifferntDirectory: If true then every video gets its own directory
:param Name: If DifferntDirectory=True then it adds the name to the file name
:return: path to the directory where the frames were saved, or None if fails
"""
video_path = os.path.normpath(video_path) # make the paths OS (Windows) compatible
frames_dir = os.path.normpath(frames_dir) # make the paths OS (Windows) compatible
video_dir, video_filename = os.path.split(video_path) # get the video path and filename from the path
if DifferntDirectory:
# make directory to save frames, its a sub dir in the frames_dir with the video name
os.makedirs(os.path.join(frames_dir, video_filename), exist_ok=True)
capture = cv2.VideoCapture(video_path) # load the video
total = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) # get its total frame count
print(
'Number 0f frames for ' + video_filename + ': ' + str(total) + '. Extracting every ' + str(every) + ' frames.')
capture.release() # release the capture straight away
if total < 1: # if video has no frames, might be and opencv error
print("Video has no frames. Check your OpenCV + ffmpeg installation")
return None # return None
frame_chunks = [[i, i + chunk_size] for i in range(0, total, chunk_size)] # split the frames into chunk lists
# make sure last chunk has correct end frame, also handles case chunk_size < total
frame_chunks[-1][-1] = min(frame_chunks[-1][-1], total - 1)
prefix_str = "Extracting frames from {}".format(video_filename) # a prefix string to be printed in progress bar
# execute across multiple cpu cores to speed up processing, get the count automatically
with ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
futures = [
executor.submit(__extract_frames, video_path, frames_dir, overwrite, f[0], f[1], every, DifferntDirectory,
Name)
for f in frame_chunks] # submit the processes: __extract_frames(...)
for i, f in enumerate(as_completed(futures)): # as each process completes
__print_progress(i, len(frame_chunks) - 1, prefix=prefix_str, suffix='Complete') # print it's progress
if DifferntDirectory:
return os.path.join(frames_dir, video_filename) # when done return the directory containing the frames
else:
return frames_dir # when done return the directory containing the frames
def UseDirectory2CrtTrainFrames(Path2InputDirectory, Path2Output, everyXFrames=10):
"""
Extract images from video (every X frames) and put them in an output directory
:param Path2InputDirectory: string. path to input directory
:param Path2Output: string. path to output directory
:param everyXFrames: int. the number of frames between each extracted image
:return: Nothing
"""
counter = 0
for f in os.listdir(Path2InputDirectory):
file_path = Path2InputDirectory + '/' + f
counter = counter + 1
VidName = 'Vid' + str(counter) + '_'
if os.path.isfile(file_path):
video_to_frames(video_path=file_path, frames_dir=Path2Output, overwrite=True, every=everyXFrames,
chunk_size=1000, DifferntDirectory=False, Name=VidName)
def CopyExtractFrames2TrainDir(Path2InputDirectory, Path2Output):
"""
After extracting images from video and after the user filtered them, the images are copied to the output path
:param Path2InputDirectory: string. input directory
:param Path2Output: string. output directory
:return: Nothing
"""
counter = 0
for f in os.listdir(Path2InputDirectory):
inputName = os.path.join(Path2InputDirectory, f)
OutputName = os.path.join(Path2Output, f)
copyfile(inputName, OutputName)
counter = counter + 1
print('Files copied: ' + str(counter))
# The following function uses segmentation models that can detect classes in the image:
# Classes to find: person,bus,car,aeroplane, bicycle, ,motorbike,bird, boat, bottle, cat, chair, cow,
# dinningtable, dog, horse pottedplant, sheep, sofa, train, tv
def preparePixelLibModel(Modeltype='segmenation'):
"""
This function retrieve the right model from the github repository.
Currently it can get either a segmentation model or a model that changes the background.
:param Modeltype: string. 'segmentation' for coloring object pixels.'Background' for changing the background.
:return: Nothing
"""
if Modeltype == 'segmenation':
urllib.request.urlretrieve('https://github.com/ayoolaolafenwa/PixelLib/releases/download/1.2/mask_rcnn_coco.h5',
'mask_rcnn_coco.h5')
elif Modeltype == 'Background':
urllib.request.urlretrieve(
'https://github.com/ayoolaolafenwa/PixelLib/releases/download/1.1/xception_pascalvoc.pb',
'xception_pascalvoc.pb')
def Segment_a_Video(PathInput, Pathoutput, AddBox=False):
"""
This function takes a video and color the relevant object pixels
:param PathInput: string. Input path
:param Pathoutput: string. Output path
:param AddBox: bool. If True then add boxes around the object
:return: Nothing
"""
cap = cv2.VideoCapture(PathInput)
fps = int(cap.get(cv2.CAP_PROP_FPS))
print('Pathoutput:' + str(Pathoutput))
segment_video = instance_segmentation(infer_speed="fast")
segment_video.load_model("mask_rcnn_coco.h5")
segment_video.process_video(PathInput, show_bboxes=AddBox, frames_per_second=fps, output_video_name=Pathoutput)
def Segment_an_Image(PathInput, Pathoutput):
"""
Color the pixel in an image where there are objects
:param PathInput: string. Path input
:param Pathoutput: string. Path output
:return: Nothing
"""
segment_image = instance_segmentation()
segment_image.load_model("mask_rcnn_coco.h5")
segment_image.segmentImage(PathInput, output_image_name=Pathoutput)
# works only on people
def ChangeBackg2Gray(PathInput, Pathoutput):
"""
Get a video and grayscale the background leaving the objects in colors
:param PathInput: string. Path input
:param Pathoutput: string. Path output
:return: Nothing
"""
cap = cv2.VideoCapture(PathInput)
fps = int(cap.get(cv2.CAP_PROP_FPS))
change_bg = alter_bg(model_type="pb")
change_bg.load_pascalvoc_model("xception_pascalvoc.pb")
change_bg.gray_video(PathInput, frames_per_second=fps, output_video_name=Pathoutput, detect='person')
# works only on people
def ChangeBackground(BackgroundImage, PathInput, Pathoutput):
"""
Change the background of a video to a specific image. The objects in the frames do not change
:param BackgroundImage: np.ndarray . Image to set as the background
:param PathInput: string. Path input
:param Pathoutput: string. Path output
:return: Nothing
"""
cap = cv2.VideoCapture(PathInput)
fps = int(cap.get(cv2.CAP_PROP_FPS))
change_bg = alter_bg(model_type="pb")
change_bg.load_pascalvoc_model("xception_pascalvoc.pb")
change_bg.change_video_bg(PathInput, BackgroundImage, frames_per_second=fps, output_video_name=Pathoutput,
detect='person')
# The following few functions refers to: Object detection using SSD and MobileNet
# SSD = Single Shot Detectors
def PrepareSSDMobile():
"""
Get the object detection model from repository
:return: cv2.dnn_Net object that contains the model for object detection
"""
# load our serialized model from disk
print("[INFO] loading model...")
urllib.request.urlretrieve(
'https://github.com/PINTO0309/MobileNet-SSD-RealSense/blob/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy'
'.caffemodel?raw=true',
'MobileNetSSD_deploy.caffemodel')
urllib.request.urlretrieve(
'https://raw.githubusercontent.com/nikmart/pi-object-detection/master/MobileNetSSD_deploy.prototxt.txt',
'MobileNetSSD_deploy.prototxt.txt')
net = cv2.dnn.readNetFromCaffe('MobileNetSSD_deploy.prototxt.txt', 'MobileNetSSD_deploy.caffemodel')
return net
def FindObjectInImage(Img, What2Find, net):
"""
Return a box over the desired object in an image
:param Img: np.ndarray. An input image
:param What2Find: List with usually one string. This is the type of object to look for in the image
:param net: cv2.dnn_Net. The model used for running the object detection
:return: Tuple that contains the box parameters around the object, the preferred location of the y acording to how
close the y to the end of the image. If no relevant object found then return ([-1], 0)
"""
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
ChosenClass = [CLASSES.index(x) for x in What2Find]
# ChosenClass = CLASSES.index(What2Find) #find the index of the relevant class in list CLASSES
# Read and preprocess
image = Img
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the prediction
confidence = detections[0, 0, i, 2]
# extract the index of the class label from the `detections`
idx = int(detections[0, 0, i, 1])
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence and make sure we have the right class
if (confidence > 0.5) and (idx in ChosenClass):
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# display the prediction
yForText = startY - 15 if startY - 15 > 15 else startY + 15
return box.astype("int"), yForText
else:
return [-1], 0
def CreateBoxAndLabelVideo(net, VideoPath, videoName, VideoOutputPath, Label, MainClass, Fontcolor=(204, 0, 0),
BoxColor=(0, 0, 255)):
"""
Create a video with Box and label over the subject.
:param net: cv2.dnn_Net. The model used for running the object detection.
:param VideoPath: string. The path to the video.
:param videoName: string. File name.
:param VideoOutputPath: string. Output directory name.
:param Label: string. What to use as text over the detected object.
:param MainClass: List with usually one string. This is the type of object to look for in the image.
:param Fontcolor: tuple. This is the RGB of the font color.
:param BoxColor: tuple. This is the RGB for the box color.
:return: Nothing.
"""
# read the video
cap = cv2.VideoCapture(VideoPath)
# Get the parameters of the input video
# fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
# Create an ouput video usig parameters from the input video
outFileName = Label + '_' + videoName
# Create the output video
out = cv2.VideoWriter(VideoOutputPath + '/' + outFileName, fourcc, fps, (width, height))
# out = cv2.VideoWriter('output.mp4',fourcc, fps, (width,height))
FrameNum = 0
font = cv2.FONT_HERSHEY_SIMPLEX
FontColor = Fontcolor
prevX = 0
prevY = 0
threshold = 0.1
while True:
# Capture frames in the video
ret, frame = cap.read()
FrameNum = FrameNum + 1
if frame is None:
break
# Search for object in frame
(OutBox, yForText) = FindObjectInImage(frame, MainClass, net)
# if the OutBox is contains more then 1 value then it means that an object was found
if len(OutBox) > 1:
# Calculate the location of the box and label
(startX, startY, endX, endY) = OutBox
x = int((startX + endX) / 2)
y = int((endY - startY) * 0.2)
# To avoid moving the frame and the label every frame we use a threshold that if the change in x or
# the change in y is greater than it then we update the location of the label and box
diffX = abs(x - prevX) / (endX - startX)
diffy = abs(y - prevY) / (endY - startY)
if diffX < threshold:
currX = prevX
else:
currX = x
if diffy < threshold:
currY = prevY
else:
currY = y
# write on the image the rectangle and the text
cv2.rectangle(frame, (startX, startY), (endX, endY), BoxColor, thickness=3)
cv2.putText(frame, Label, (currX, currY), font, 2, FontColor, 3)
out.write(frame)
prevY = currY
prevX = currX
# release the cap object
cap.release()
out.release()
def CrtBoxAndLblImg(image, Class2Find, net, Label, Fontcolor=(204, 0, 0), BoxColor=(0, 0, 255), FontScale=3,
Fnthickness=3):
"""
Create a box and a label over the desired object
:param image: np.ndarray. An input image
:param Class2Find: list. What class we are looking for
:param net: cv2.dnn_Net. The model used for running the object detection.
:param Label: string. The label to put over the object
:param Fontcolor: tuple RGB. Font color
:param BoxColor: tuple RGB. Box color
:param FontScale: int. How big should be the font
:param Fnthickness: int. Font thickness
:return: np.ndarray. An image with the box and the label
"""
font = cv2.FONT_HERSHEY_SIMPLEX
(OutBox, yForText) = FindObjectInImage(image, Class2Find, net)
if len(OutBox) > 1:
# Calculate the location of the box and label
(startX, startY, endX, endY) = OutBox
fc = Fontcolor
# find the location to put the box and label.
x = int((startX + endX) / 2)
y = int((endY - startY) * 0.2)
cv2.rectangle(image, (startX, startY), (endX, endY), BoxColor, thickness=3)
cv2.putText(image, Label, (x, y), font, FontScale, fc, Fnthickness)
return image