-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathprepare_fairface.py
205 lines (155 loc) · 8.2 KB
/
prepare_fairface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import argparse
import os
from collections import defaultdict
from typing import Dict, List, Optional, Tuple
import cv2
import pandas as pd
import torch
import tqdm
from mivolo.data.data_reader import PictureInfo, get_all_files
from mivolo.modeling.yolo_detector import Detector, PersonAndFaceResult
from preparation_utils import assign_persons, associate_persons, get_additional_bboxes, get_main_face, save_annotations
def read_fairface_annotations(annotations_files):
annotations_per_image = {}
cols = ["file", "age", "gender"]
for file in annotations_files:
split_name = os.path.basename(file).split(".")[0].split("_")[-1]
df = pd.read_csv(file, sep=",", usecols=cols)
for index, row in df.iterrows():
aligned_face_path = row["file"]
age, gender = row["age"], row["gender"]
# M or F
gender = gender[0].upper() if isinstance(gender, str) else None
age = age.replace("-", ";") if isinstance(age, str) else None
annotations_per_image[aligned_face_path] = {"age": age, "gender": gender, "split": split_name}
return annotations_per_image
def read_data(images_dir, annotations_files) -> Tuple[List[PictureInfo], List[PictureInfo]]:
dataset_pictures_train: List[PictureInfo] = []
dataset_pictures_val: List[PictureInfo] = []
all_images = get_all_files(images_dir)
annotations_per_file = read_fairface_annotations(annotations_files)
SPLIT_TYPE = Dict[str, Dict[str, int]]
splits_stat_per_gender: SPLIT_TYPE = defaultdict(lambda: defaultdict(int))
splits_stat_per_ages: SPLIT_TYPE = defaultdict(lambda: defaultdict(int))
age_map = {"more than 70": "70;120"}
for image_path in all_images:
relative_path = image_path.replace(f"{images_dir}/", "")
annot = annotations_per_file[relative_path]
split = annot["split"]
age, gender = annot["age"], annot["gender"]
age = age_map[age] if age in age_map else age
splits_stat_per_gender[split][gender] += 1
splits_stat_per_ages[split][age] += 1
if split == "train":
dataset_pictures_train.append(PictureInfo(image_path, age, gender))
elif split == "val":
dataset_pictures_val.append(PictureInfo(image_path, age, gender))
else:
raise ValueError(f"Unknown split name: {split}")
print(f"Found train/val images: {len(dataset_pictures_train)}/{len(dataset_pictures_val)}")
for split, stat_per_gender in splits_stat_per_gender.items():
print(f"\n{split} Per gender images: {stat_per_gender}")
for split, stat_per_ages in splits_stat_per_ages.items():
ages = list(stat_per_ages.keys())
print(f"\n{split} Per ages categories ({len(ages)} cats) :")
ages = sorted(ages, key=lambda x: int(x.split(";")[0].strip()))
for age in ages:
print(f"Age: {age} Count: {stat_per_ages[age]}")
return dataset_pictures_train, dataset_pictures_val
def find_persons_on_image(image_info, main_bbox, detected_objects, other_faces_inds, device):
# find person_ind for each face (main + other_faces)
all_faces: List[torch.tensor] = [torch.tensor(main_bbox).to(device)] + [
detected_objects.get_bbox_by_ind(ind) for ind in other_faces_inds
]
faces_persons_map, other_persons_inds = associate_persons(all_faces, detected_objects)
additional_faces: List[PictureInfo] = get_additional_bboxes(
detected_objects, other_faces_inds, image_info.image_path
)
# set person bboxes for all faces (main + additional_faces)
assign_persons([image_info] + additional_faces, faces_persons_map, detected_objects)
if faces_persons_map[0] is not None:
assert all(coord != -1 for coord in image_info.person_bbox)
additional_persons: List[PictureInfo] = get_additional_bboxes(
detected_objects, other_persons_inds, image_info.image_path, is_person=True
)
return additional_faces, additional_persons
def main(faces_dir: str, annotations: List[str], data_dir: str, detector_cfg: dict = None):
"""
Generate a .txt annotation file with columns:
["img_name", "age", "gender",
"face_x0", "face_y0", "face_x1", "face_y1",
"person_x0", "person_y0", "person_x1", "person_y1"]
If detector_cfg is set, for each face bbox will be refined using detector.
Person bbox will be assigned for each face.
Also, other detected faces and persons wil be written to txt file (needed for further preprocessing)
"""
# out directory for txt annotations
out_dir = os.path.join(data_dir, "annotations")
os.makedirs(out_dir, exist_ok=True)
# load annotations
dataset_pictures_train, dataset_pictures_val = read_data(faces_dir, annotations)
for images, split_name in zip([dataset_pictures_train, dataset_pictures_val], ["train", "val"]):
if detector_cfg:
# detect faces with yolo detector
faces_not_found, images_with_other_faces = 0, 0
other_faces: List[PictureInfo] = []
detector_weights, device = detector_cfg["weights"], detector_cfg["device"]
detector = Detector(detector_weights, device, verbose=False, conf_thresh=0.1, iou_thresh=0.2)
for image_info in tqdm.tqdm(images, desc=f"Detecting {split_name} faces: "):
cv_im = cv2.imread(image_info.image_path)
im_h, im_w = cv_im.shape[:2]
# all images are 448x448 and with 125 padding
coarse_bbox = [125, 125, im_w - 125, im_h - 125] # xyxy
detected_objects: PersonAndFaceResult = detector.predict(cv_im)
main_bbox, other_faces_inds = get_main_face(detected_objects, coarse_bbox)
if len(other_faces_inds):
images_with_other_faces += 1
if main_bbox is None:
# use a full image as face bbox
faces_not_found += 1
main_bbox = coarse_bbox
image_info.bbox = main_bbox
additional_faces, additional_persons = find_persons_on_image(
image_info, main_bbox, detected_objects, other_faces_inds, device
)
# add all additional faces
other_faces.extend(additional_faces)
# add persons with empty faces
other_faces.extend(additional_persons)
print(f"Faces not detected: {faces_not_found}/{len(images)}")
print(f"Images with other faces: {images_with_other_faces}/{len(images)}")
print(f"Other bboxes (faces/persons): {len(other_faces)}")
images = images + other_faces
else:
for image_info in tqdm.tqdm(images, desc="Collect face bboxes: "):
cv_im = cv2.imread(image_info.image_path)
im_h, im_w = cv_im.shape[:2]
# all images are 448x448 and with 125 padding
image_info.bbox = [125, 125, im_w - 125, im_h - 125] # xyxy
save_annotations(images, faces_dir, out_file=os.path.join(out_dir, f"{split_name}_annotations.csv"))
def get_parser():
parser = argparse.ArgumentParser(description="FairFace")
parser.add_argument(
"--dataset_path",
default="data/FairFace",
type=str,
required=True,
help="path to folder with fairface-img-margin125-trainval/ and fairface_label_{split}.csv",
)
parser.add_argument(
"--detector_weights", default=None, type=str, required=False, help="path to face and person detector"
)
parser.add_argument("--device", default="cuda:0", type=str, required=False, help="device to inference detector")
return parser
if __name__ == "__main__":
parser = get_parser()
args = parser.parse_args()
data_dir = args.dataset_path
faces_dir = os.path.join(data_dir, "fairface-img-margin125-trainval")
if data_dir[-1] == "/":
data_dir = data_dir[:-1]
annotations = [os.path.join(data_dir, "fairface_label_train.csv"), os.path.join(data_dir, "fairface_label_val.csv")]
detector_cfg: Optional[Dict[str, str]] = None
if args.detector_weights is not None:
detector_cfg = {"weights": args.detector_weights, "device": "cuda:0"}
main(faces_dir, annotations, data_dir, detector_cfg)