-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathtest.py
187 lines (150 loc) · 6.52 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import time
# relPath = "test/dir/dir2/volunteers/Lipspkr1/sa1/sa1.phn"
# topDir = relPath.split('/')[0]
# print topDir
# while not (topDir == "volunteers" or topDir == "lipspeakers"):
# relPath = '/'.join(relPath.split('/')[1:])
# topDir = relPath.split('/')[0]
#
# print relPath, topDir
# time.sleep(0.5)
# print relPath
import os
from utils.removeEmptyDirs import *
from fixTCDTIMITwavStructure import *
from getPhnFiles import *
# dstDir = os.path.expanduser("~/TCDTIMIT/TCDTIMITaudio")
#
# ## PHN: generate phoneme files
# print("EXTRACTING PHNS...")
# print("lipspeakers:")
# generatePHN("MLFfiles/lipspeaker_labelfiles.mlf", dstDir)
# print("volunteers:")
# generatePHN("MLFfiles/volunteer_labelfiles.mlf", dstDir)
import pdb
# some phonemes are skipped??
# videos = readMLFfile("MLFfiles/lipspeaker_labelfiles.mlf")
# for video in videos[0:10]:
# videoPath, phonemes = processVideoFile(video)
# pdb.set_trace()
from utils.helpFunctions import *
# get valid times, phonemes, frame numbers
def getValid(time_phonemes, framerate): # frameRate = 29.97 for the TCDTimit database
import math
# take care of duplicates: loop through the phonemes, if two are same frame, only keep the first one
seen_framePhonemes = set()
validFrames = []
validPhonemes = []
validTimes = []
for time_phoneme in time_phonemes:
time = float(time_phoneme[0])
frame = int(math.floor(time * framerate))
phoneme = time_phoneme[1]
if (frame, phoneme) not in seen_framePhonemes:
validPhonemes.append(time_phoneme[1])
validTimes.append(time)
validFrames.append(frame)
seen_framePhonemes.add(frame)
else:
print("frame_phoneme ", (frame, phoneme), " already seen")
return validTimes, validFrames, validPhonemes
# write file with phonemes and corresponding frame numbers. First column = frames. Second column = corresponding phonemes
def writePhonemesToFile2(videoName, speakerName, phonemes, targetDir):
validTimes, validFrames, validPhonemes = getValid(phonemes, 29.97)
phonemeFile = ''.join([targetDir, os.sep, speakerName, "_", videoName, "_PHN.txt"])
if not os.path.exists(targetDir): os.makedirs(targetDir)
# add 1 to the validFrames to fix the ffmpeg issue (starts at 1 instead of 0)
for i in range(0, len(validFrames)):
validFrames[i] += 1
if validFrames[i] <1: validFrames[i]=1
# check that no frames are larger than last frame extracted by extractAllFrames
highest = 1
for root, dirs, files in os.walk(targetDir):
for file in files:
print(file)
name, ext = os.path.splitext(file)
if not ext ==".jpg": continue
frame = int(name.split("_")[1])
if frame > highest: highest = frame
pdb.set_trace()
for i in range(len(validFrames)):
if validFrames[i] > highest:
print("FOUND HIGHER THAN HIGHEST:", validFrames[i])
validFrames[i] = highest
pdb.set_trace()
# write to text file
thefile = open(phonemeFile, 'w')
for i in range(len(validFrames) - 1):
item = (validFrames[i], validPhonemes[i])
thefile.write(' '.join(map(str, item)) + "\r\n")
item = (validFrames[-1], validPhonemes[-1])
thefile.write(' '.join(map(str, item)))
thefile.close()
# also write a mat file
matPath = targetDir + os.sep + "phonemeFrames.mat"
sio.savemat(matPath, {'validFrames': np.array(validFrames), 'validPhonemes': np.array(validPhonemes)})
return 0
def deleteUnneededFiles(videoDir):
print("deleting files...")
# read correct frames: firs column of text file
parentName = os.path.basename(os.path.dirname(videoDir))
dirName = os.path.basename(videoDir)
validFrames = []
with open(videoDir + os.sep + parentName + "_" + dirName + "_PHN.txt") as inf:
for line in inf:
parts = line.split() # split line into parts
if len(parts) > 1: # if at least 2 parts/columns
validFrames.append(parts[0]) # print column 2
import pdb;pdb.set_trace()
# walk through the files, if a file doesn't contain '_validFrame', then remove it.
nbRemoved = 0
for root, dirs, files in os.walk(videoDir):
files.sort(key=tryint)
for f in files:
name, ext = os.path.splitext(f)
filePath = os.path.join(root, f)
if ext != ".jpg": remove = 0; continue
fname = os.path.splitext(f)[0]
fnumber = fname.split("_")[1]
if fnumber not in validFrames:
os.remove(filePath)
nbRemoved += 1
return nbRemoved
detector = dlib.get_frontal_face_detector()
predictor_path = "./shape_predictor_68_face_landmarks.dat"
if not os.path.exists(predictor_path):
print('Landmark predictor not found!')
predictor = dlib.shape_predictor(predictor_path)
storageLocation = os.path.expanduser("~/TCDTIMIT/extracted")
if not os.path.exists(storageLocation): os.makedirs(storageLocation)
video1 = readMLFfile('/home/matthijs/Desktop/si2246.phn')[0]
video2 = readMLFfile('/home/matthijs/Desktop/sx180.phn')[0]
video3 = readMLFfile('/home/matthijs/Desktop/sx343.phn')[0]
video4 = readMLFfile('/home/matthijs/Desktop/si512.phn')[0]
video5 = readMLFfile('/home/matthijs/Desktop/si549.phn')[0]
videos = [video5]#,video2,video3]
for video in videos:
videoPath, phonemes = processVideoFile(video)
print(videoPath)
if not os.path.exists(videoPath):
print("The file ", videoPath, " does not exist.")
#pdb.set_trace()
print(phonemes)
videoName = os.path.splitext(os.path.basename(videoPath))[0]
storeDir = fixStoreDirName(storageLocation, videoName, video[0])
print("Extracting phonemes from ", videoPath, ", saving to: \t", storeDir)
framerate = 29.97
extractAllFrames(videoPath, videoName, storeDir, framerate, '1200:1000', '350:0')
# write phonemes and frame numbers to file
speakerName = os.path.basename(os.path.dirname(storeDir))
writePhonemesToFile2(videoName, speakerName, phonemes, storeDir)
videoDir = fixStoreDirName(storageLocation, videoName, video[0])
a = deleteUnneededFiles(videoDir)
sourceDir = fixStoreDirName(storageLocation, videoName, video[0])
extractFacesMouths(sourceDir, storeDir, detector, predictor)
dirNames = ["faces", "mouths"]
convertToGrayScale(sourceDir, dirNames)
dirNames = ["mouths_gray", "faces_gray"]
storeDir = fixStoreDirName(storageLocation, videoName, video[0])
resizeImages(storeDir, dirNames, False, 120.0)
print("deleted: ", a)