-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathYOLO_privacy_script
196 lines (153 loc) · 6.19 KB
/
YOLO_privacy_script
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 21 13:27:50 2021
Modules: MTCNN face detection,
YOLO (cars), to be implemented later
YOLO (phones and screens) to be implemented later
Version 2: only detects faces if YOLO has detected a person
@author: michellegreene
@author: peterriley
"""
import cv2
# import skvideo.io
import numpy as np
import argparse
from mtcnn.mtcnn import MTCNN
# construct the argument parser
# parser = argparse.ArgumentParser(description='A script to blur faces and other sensitive material in video frames in a folder and save them in a new folder.')
# parser.add_argument("-i","--input", help="Path to input video. Make sure it only contains images.")
# parser.add_argument("-o","--output", help="Path to where the new video is to be saved.")
# args = vars(parser.parse_args())
# establish video reader and writer
# hard coding codec
fourcc = 'avc1'
# open a video object
# vid = cv2.VideoCapture(args["input"])
inName = '/Users/abrahammieses/Desktop/video_folder/Crowd.mov'
outName = '/Users/abrahammieses/Desktop/video_folder/Crowding.mov'
# inName = '/Volumes/etna/Scholarship/Michelle Greene/Students/Shared/cats.mp4'
# outName = '/Volumes/etna/Scholarship/Michelle Greene/Students/Shared/cats2.mp4'
vid = cv2.VideoCapture(inName)
# get video properties
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
videoSize = (width, height)
fps = vid.get(cv2.CAP_PROP_FPS)
# define YOLO parameters
CONF_THRESH, NMS_THRESH = 0.1, 0.5
config = "./yolo_files/yolov3-tiny.cfg"
weights = "./yolo_files/yolov3-tiny.weights"
# Load the network
net = cv2.dnn.readNetFromDarknet(config, weights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
# Get the output layer from YOLO
layers = net.getLayerNames()
output_layers = [layers[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# create video writer
# outputFile = args["output"]
# writer = skvideo.io.FFmpegWriter(outputFile, outputdict={
# '-vcodec': 'libx264', #use the h.264 codec
# '-crf': '0', #set the constant rate factor to 0, which is lossless
# '-preset':'veryslow' #the slower the better compression, in princple, try
# #other options see https://trac.ffmpeg.org/wiki/Encode/H.264
# })
writer = cv2.VideoWriter(outName, cv2.VideoWriter_fourcc(*fourcc), fps, videoSize, True)
# Helper functions
def rect_to_ellipse(x, y, width, height):
vert_axis = round(height / 2)
horz_axis = round(width / 2)
center_x = round(x + horz_axis)
center_y = round(y + vert_axis)
center_coordinates = (center_x, center_y)
axesLength = (horz_axis, vert_axis)
return center_coordinates, axesLength
def blur(image):
blurred = cv2.medianBlur(image, 149) # 149 pixels (must be odd number)
return blurred
def logical_mask(image, scrambled, mask):
fg = cv2.bitwise_or(scrambled, scrambled, mask=mask)
mask = cv2.bitwise_not(mask)
bk = cv2.bitwise_or(image, image, mask=mask)
newImage = cv2.bitwise_or(fg, bk)
return newImage
def face_detect(image):
detector = MTCNN()
# detect faces in the image
faces = detector.detect_faces(image)
return faces
def yoloPerson(image):
isPerson = False
height, width = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layer_outputs = net.forward(output_layers)
class_ids, confidences, b_boxes = [], [], []
for output in layer_outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if class_id == 0 and confidence > CONF_THRESH: # person
isPerson = True
return isPerson
def resize(input_image, scale_percentage):
#img = cv2.imread(input_image, cv2.IMREAD_UNCHANGED)
img = cv2.imread(input_image)
original_dimensions = img.shape
scale_percent = scale_percentage # percent of original size you want
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
resized_dimensions = resized.shape
return resized, scale_percentage
##### Main processing loop
count = 0
while vid.isOpened():
# read a frame
success, image = vid.read()
# assumes that any failure is the end of the video
if not success:
break
# counter for debugging
count += 1
if count % 10 == 0:
print('Processed: {} frames'.format(count))
# check to see if there is a person in the image
isPerson = yoloPerson(image)
if isPerson:
# initialize mask
mask = np.full((image.shape[0], image.shape[1]), 0, dtype=np.uint8)
# resize image here
scale_percentage = 20
imaged_resized = resize(image, scale_percentage)
# create blurred version of entire image
scrambled = blur(image)
# detect faces in image
faceCoordinates = face_detect(imaged_resized)
# for each face, convert bounding box to ellipse
for j in range(len(faceCoordinates)): # j = which face in the frame
x, y, width, height = (faceCoordinates[j]['box'])
# rescaled x, y, w, h
x_rescaled = x / (scale_percentage / 100)
y_rescaled = y / (scale_percentage / 100)
width_rescaled = width / (scale_percentage / 100)
height_rescaled = height / (scale_percentage / 100)
# converts the bounding box to an ellipse via a custom function
ellipse = rect_to_ellipse(x_rescaled, y_rescaled, width_rescaled, height_rescaled)
# puts the ellipse onto the mask
cv2.ellipse(mask, ellipse[0], ellipse[1], 0, 0, 360, 255, -1)
# apply logical masking to each face
newImage = logical_mask(image, scrambled, mask)
else:
newImage = image
# write newImage as a frame
# writer.writeFrame(newImage[:,:,::-1]) #write the frame as RGB not BGR
writer.write(newImage)
# release the input and output objects
vid.release()
writer.release()
# writer.close()