From abd152548ece43737d9463f6511e25539c0451a2 Mon Sep 17 00:00:00 2001 From: bwstuff Date: Sun, 24 Feb 2019 20:02:00 +0100 Subject: [PATCH] #96 removed duplicate scripts --- data_loader/fg-net_data_loader.py | 97 ------- data_loader/fg-script.py | 94 ------- data_loader/imdb_script.py | 242 ------------------ .../lap-script-without-face-extraction.py | 87 ------- data_loader/lap-script.py | 92 ------- 5 files changed, 612 deletions(-) delete mode 100644 data_loader/fg-net_data_loader.py delete mode 100644 data_loader/fg-script.py delete mode 100644 data_loader/imdb_script.py delete mode 100644 data_loader/lap-script-without-face-extraction.py delete mode 100644 data_loader/lap-script.py diff --git a/data_loader/fg-net_data_loader.py b/data_loader/fg-net_data_loader.py deleted file mode 100644 index 36e4fab..0000000 --- a/data_loader/fg-net_data_loader.py +++ /dev/null @@ -1,97 +0,0 @@ - -import cv2 as cv -import numpy as np -import os -import tensorflow as tf -from sklearn.model_selection import train_test_split - -import sys - -def load_images_from_folder(folder): - images = [] - for subfolder in os.listdir(folder): - images.append(os.path.join(folder, subfolder)) - return images - -# folder where to search for pictures - no non folder/picture files or error -fp = "../data/fg-net_set/FGNET/images" -cur_pad = os.path.normpath(fp) -full_path = load_images_from_folder(cur_pad) - -print("-> metadata file read complete, creating data arrays") - - -age = [] -for a in full_path: - b = a.split('\\') - b = b[5].split('.') - - c = b[0].split('A') - d = c[1].strip('a') - d = d.strip('b') - age.append(d) - -I = {} -for idx, val in enumerate(full_path): - try: - I[val] = {"age": age[idx]} - except: - I[val] = {"age": -1} - - I[val]["img"] = cv.imread(val) - try: - I[val]["img"] = cv.resize(I[val]["img"], (224, 224)) - except: - del I[val] - - -print("-> data array creating completed, flushing into training ready dataset") - -X = [] -Y = [] -for k, v in I.items(): - X.append(v["img"]) - Y.append(v["age"]) -print("-> " + 'train' + " set ready") - -#load images -def load_image(img): - #img = cv.resize(img, (224, 224), interpolation=cv.INTER_CUBIC) - img = cv.cvtColor(img, cv.COLOR_BGR2RGB) - img = img.astype(np.float32) - return img - - -#convert data to features -def _int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - -def _bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -#write data in tfrecords file -filename = 'fg set for tfrecords' - - -writer = tf.python_io.TFRecordWriter(filename) - -for i in range(len(X)): - img = load_image(X[i]) - - ''' - choose the right label you want to train on - ''' - - label = Y[i] - - label = int(label) - - feature = {'train/label': _int64_feature(label), - 'train/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))} - - example = tf.train.Example(features=tf.train.Features(feature=feature)) - writer.write(example.SerializeToString()) -writer.close() - -print("-> created and wrote trfrecords file for selected dataset") \ No newline at end of file diff --git a/data_loader/fg-script.py b/data_loader/fg-script.py deleted file mode 100644 index 3a6b35b..0000000 --- a/data_loader/fg-script.py +++ /dev/null @@ -1,94 +0,0 @@ -import urllib.request -import os -import csv -import cv2 as cv -import random - -face_cascade = cv.CascadeClassifier('haarcascade_frontalface_default.xml') - - -def createFolder(directory): - try: - if not os.path.exists(directory): - os.makedirs(directory) - except OSError: - print("Error: Creating directory.") - -def createClassificationFolders(directory): - createFolder(directory) - for x in range(101): - createFolder(directory + "/" + str(x)) - -def load_images_from_folder(folder): - images = [] - for subfolder in os.listdir(folder): - images.append(os.path.join(folder, subfolder)) - return images - -if not os.path.exists("../data/FGNET.zip"): - print("Start downloading Zipfile") - urllib.request.urlretrieve("http://yanweifu.github.io/FG_NET_data/FGNET.zip", "../data/FGNET.zip") - print("Finished downloading Zipfile") -else: - print("Zip already downloaded.") - -if not os.path.exists("../data/FGNET"): - print("Start unzipping") - import zipfile - with zipfile.ZipFile("../data/FGNET.zip", 'r') as zip_ref: - zip_ref.extractall("../data/FGNET") - print("Finished unzipping") -else: - print("Already unzipped") - - -age = [] -full_path = [] -full_path2 = load_images_from_folder("../data/FGNET/FGNET/images") -delta = 5 - -for a in full_path2: - b = a.split('\\') - full_path.append(b[1]) - b = b[1].split('.') - - c = b[0].split('A') - d = c[1].strip('a') - d = d.strip('b') - age.append(d) - -counter_training = 0 -counter_valid = 0 -counter_test = 0 -print("Printing Images to Classification Folders") - -for idx, val in enumerate(full_path): - img = cv.imread("../data/FGNET/FGNET/images/" + val) - faces = face_cascade.detectMultiScale(img, 1.8, 5) - for (x,y,w,h) in faces: - cip = img[y-delta:y+h+delta, x-delta:x+w+delta].copy() - try: - cip = cv.resize(cip, (224, 224)) - i = random.random() - if i > 0.5: - counter_training = counter_training + 1 - cv.imwrite("../data/LAP/Train/" + str(int(age[idx])) + "/" + val, cip) - print("Written: " + "../data/LAP/Train/" + str(int(age[idx])) + "/" + val) - elif i < 0.25: - counter_valid = counter_valid + 1 - cv.imwrite("../data/LAP/Validation/" + str(int(age[idx])) + "/" + val, cip) - print("Written: " + "../data/LAP/Validation/" + str(int(age[idx])) + "/" + val) - else: - counter_test = counter_test + 1 - cv.imwrite("../data/LAP/Test/" + str(int(age[idx])) + "/" + val, cip) - print("Written: " + "../data/LAP/Test/" + str(int(age[idx])) + "/" + val) - except: - print("Resize Fail") - -f = open("../data/img_counts_fg.txt", "w+") -f.write("Train_img_count = " + str(counter_training) + "\n") -f.write("Validation_img_count = " + str(counter_valid) + "\n") -f.write("Test_img_count = " + str(counter_test) + "\n") -f.close() - -print("FINISHED") \ No newline at end of file diff --git a/data_loader/imdb_script.py b/data_loader/imdb_script.py deleted file mode 100644 index a347c63..0000000 --- a/data_loader/imdb_script.py +++ /dev/null @@ -1,242 +0,0 @@ -import urllib -import os -import csv -import cv2 as cv -import random -import numpy as np -from datetime import datetime, timedelta -from sklearn.model_selection import train_test_split -import os.path - - -face_cascade = cv.CascadeClassifier('haarcascade_frontalface_default.xml') - - -def createFolder(directory): - try: - if not os.path.exists(directory): - os.makedirs(directory) - except OSError: - print("Error: Creating directory.") - -def createClassificationFolders(directory): - createFolder(directory) - for x in range(101): - createFolder(directory + "/" + str(x)) - -def load_images_from_folder(folder): - images = [] - for subfolder in os.listdir(folder): - images.append(os.path.join(folder, subfolder)) - return images - -if not os.path.exists("../data/imdb_crop.tar"): - print("Start downloading Tarfile") - urllib.urlretrieve("https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/imdb_crop.tar", "../data/imdb_crop.tar") - print("Finished downloading Tarfile") -else: - print("Tarfile already downloaded.") - -if not os.path.exists("../data/imdb_crop"): - print("Start unpacking") - import tarfile - tar = tarfile.open("../data/imdb_crop.tar") - tar.extractall(path="../data") - tar.close() - print("Finished unpacking") -else: - print("Already unpacked") - -with open('imdb_metadata.csv', 'r') as file: - lines = [line.rstrip('\n') for line in file] - # dob, full_path, gender, photo_taken, face_location - dob = lines[0] - full_path = lines[2] - gender = lines[4] - photo_taken = lines[6] - face_location = lines[8] - - dob = dob.lstrip('[').rstrip(']') - dob = dob.split(',') - dob = [float(x) for x in dob] - print(dob[0]) - - full_path = full_path.split(' ') - print(full_path[0]) - - gender = gender.lstrip('[').rstrip(']') - gender = gender.split(',') - gender = [float(x) for x in gender] - print(gender[0]) - - photo_taken = photo_taken.lstrip('[').rstrip(']') - photo_taken = photo_taken.split(',') - photo_taken = [str(int(float(x))) for x in photo_taken] - print(photo_taken[0]) - - face_location = face_location.split(' ') - face_location = [x.lstrip('[[').rstrip(']]').split(',') for x in face_location] - print(face_location[0]) - -# maps file path to tuple containing the rest of the image information -I = {} -for idx, val in enumerate(full_path): - try: - I[val] = {"dob_m": dob[idx], - "dob": datetime.fromordinal(int(dob[idx])) + timedelta(days=int(dob[idx]) % 1) - timedelta(days=366), - "gender": gender[idx], "photo_taken": datetime.strptime(photo_taken[idx], "%Y"), - "face_location": face_location[idx]} - I[val]["age"] = (I[val]["photo_taken"] - I[val]["dob"]).days / 365.2425 - except: - I[val] = {"dob_m": dob[idx], - "dob": -1, - "gender": gender[idx], "photo_taken": datetime.strptime(photo_taken[idx], "%Y"), - "face_location": face_location[idx]} - I[val]["age"] = -1 - I[val]["img_path"] = "../data/imdb_crop/" + val - - if not os.path.isfile("../data/imdb_crop" + "/" + val): - del I[val] - continue - #im_test = cv.imread("../data/imdb_crop" + "/" + val) - #if im_test is None: - # del I[val] - -print("-> data array creating completed, flushing into training ready dataset") - -X = [] -Y_age = [] -Y_gender = [] -for k, v in I.items(): - X.append(v["img_path"]) - Y_age.append( - v["age"] - ) - Y_gender.append( - v["gender"] - ) -print("-> training set ready for splitting") -print(X[0]) -print(Y_age[0]) -print(Y_gender[0]) - -#number of images for trainingset -size_training = (len(I) * 0.75) / len(I) - -''' - X_train = images for trainingset - Y_age_train = age-labels for trainingset - Y_gender_train = gender-labels for trainingset - X_val = images for validationset - Y_age_val = age-labels for validationset - Y_gender_val = gender-labels for validationset - X_test = images for testset - Y_age_test = age-labels for testset - Y_gender_test = gender-labels for testset -''' -#shuffle default = true -#stratify default = none --> nicht schichtenweise -X_train, X_tmp, Y_age_train, Y_age_tmp, Y_gender_train, Y_gender_tmp = train_test_split( - X, Y_age, Y_gender, train_size=size_training, random_state=1 -) - -#number of images for validationset -size_val = (len(X_tmp) * 0.2) / len(X_tmp) - -X_val, X_test, Y_age_val, Y_age_test, Y_gender_val, Y_gender_test = train_test_split( - X_tmp, Y_age_tmp, Y_gender_tmp, train_size=size_val, random_state=1 -) - -print("-> dataset splitted") - -def write_tfrecord(datasetX, datasetY, t): - counter = 0 - import math - print("-> starting export of " + t) - - for i in range(len(datasetX)): - img = cv.imread(datasetX[i]) - if img is None: - continue - label = datasetY[i] - label = int(label) if not math.isnan(label) else -1 - if label < 0 or label > 100: - continue - - faces = face_cascade.detectMultiScale(img, 1.8, 5) - for (x,y,w,h) in faces: - cip = img[y-5:y+h+5, x-5:x+w+5].copy() - try: - cip = cv.resize(cip, (224, 224)) - counter += 1 - if not os.path.exists("../data/IMDB/" + t + "/" + str(label)): - os.makedirs("../data/IMDB/" + t + "/" + str(label)) - cv.imwrite("../data/IMDB/" + t + "/" + str(label) + "/" + str(counter) + "_" + datasetX[i].split("/")[-1], cip) - #print("W " + "../data/IMDB/" + t + "/" + str(int(label)) + "/" + str(counter) + "_" + datasetX[i].split("/")[-1]) - except: - print("Resize Fail") - - -print(X_train[X_train.index(X[0])]) -print(Y_age_train[X_train.index(X[0])]) -print(Y_gender_train[X_train.index(X[0])]) - -write_tfrecord(X_train, Y_age_train, "age/train") -write_tfrecord(X_train, Y_gender_train, "gender/train") -write_tfrecord(X_val, Y_age_val, "age/val") -write_tfrecord(X_val, Y_gender_val, "gender/val") -write_tfrecord(X_test, Y_age_test, "age/test") -write_tfrecord(X_test, Y_gender_test, "gender/test") - -print("-> created and wrote trfrecords file for selected dataset") - -''' -age = [] -full_path = [] -full_path2 = load_images_from_folder("../data/FGNET/FGNET/images") -delta = 5 - -for a in full_path2: - b = a.split('\\') - full_path.append(b[1]) - b = b[1].split('.') - - c = b[0].split('A') - d = c[1].strip('a') - d = d.strip('b') - age.append(d) - -counter_training = 0 -counter_valid = 0 -counter_test = 0 -print("Printing Images to Classification Folders") - -for idx, val in enumerate(full_path): - img = cv.imread("../data/FGNET/FGNET/images/" + val) - faces = face_cascade.detectMultiScale(img, 1.8, 5) - for (x,y,w,h) in faces: - cip = img[y-delta:y+h+delta, x-delta:x+w+delta].copy() - try: - cip = cv.resize(cip, (224, 224)) - i = random.random() - if i > 0.5: - counter_training = counter_training + 1 - cv.imwrite("../data/LAP/Train/" + str(int(age[idx])) + "/" + val, cip) - print("Written: " + "../data/LAP/Train/" + str(int(age[idx])) + "/" + val) - elif i < 0.25: - counter_valid = counter_valid + 1 - cv.imwrite("../data/LAP/Validation/" + str(int(age[idx])) + "/" + val, cip) - print("Written: " + "../data/LAP/Validation/" + str(int(age[idx])) + "/" + val) - else: - counter_test = counter_test + 1 - cv.imwrite("../data/LAP/Test/" + str(int(age[idx])) + "/" + val, cip) - print("Written: " + "../data/LAP/Test/" + str(int(age[idx])) + "/" + val) - except: - print("Resize Fail") - -f = open("../data/img_counts_fg.txt", "w+") -f.write("Train_img_count = " + str(counter_training) + "\n") -f.write("Validation_img_count = " + str(counter_valid) + "\n") -f.write("Test_img_count = " + str(counter_test) + "\n") -f.close() -''' diff --git a/data_loader/lap-script-without-face-extraction.py b/data_loader/lap-script-without-face-extraction.py deleted file mode 100644 index 58fd71a..0000000 --- a/data_loader/lap-script-without-face-extraction.py +++ /dev/null @@ -1,87 +0,0 @@ -import urllib -import os -import csv -import cv2 as cv - -def createFolder(directory): - try: - if not os.path.exists(directory): - os.makedirs(directory) - except OSError: - print("Error: Creating directory.") - -def createClassificationFolders(directory): - createFolder(directory) - for x in range(101): - createFolder(directory + "/" + str(x)) - -if not os.path.exists("../data/appa-real-release.zip"): - print("Start downloading Zipfile") - urllib.urlretrieve("http://158.109.8.102/AppaRealAge/appa-real-release.zip", "../data/appa-real-release.zip") - print("Finished downloading Zipfile") -else: - print("Zip already downloaded.") - -if not os.path.exists("../data/appa-real-release"): - print("Start unzipping") - import zipfile - with zipfile.ZipFile("../data/appa-real-release.zip", 'r') as zip_ref: - zip_ref.extractall("../data/appa-real-release") - print("Finished unzipping") -else: - print("Already unzipped") - - -folder_directory = "../data/" - -if not os.path.exists(folder_directory + "LAP"): - print("Start creating Classification folders") - createFolder(folder_directory + "LAP") - createClassificationFolders(folder_directory + "LAP/Train") - createClassificationFolders(folder_directory + "LAP/Valid") - createClassificationFolders(folder_directory + "LAP/Test") - print("Finished creating Classification folders") -else: - print("Classification folders already present") - - -def readAndPrintDataImages(type_set_csv, type_set_target): - age = [] - full_path =[] - delta = 5 - counter = 0 - with open("../data/appa-real-release/appa-real-release/gt_avg_" + type_set_csv + ".csv", "r") as f: - reader = csv.reader(f, delimiter="\\") - for i, line in enumerate(reader): - if i != 0: - age_string = line[0].split(',')[4] - age_float = float(age_string) - age.append(age_float) - path = line[0].split(',')[0] - full_path.append(path) - - - for idx, val in enumerate(full_path): - img = cv.imread("../data/appa-real-release/appa-real-release/" + type_set_csv + "/" + val) - try: - cip = cv.resize(img, (224, 224)) - counter = counter + 1 - cv.imwrite("../data/LAP/" + type_set_target + "/" + str(int(age[idx])) + "/" + val, img) - print("Written: " + "../data/LAP/" + type_set_target + "/" + str(int(age[idx])) + "/" + val) - except: - print("Resize Fail") - - return counter - -print("Printing Images to Classification Folders") -train_img_count = readAndPrintDataImages("train", "Train") -valid_img_count = readAndPrintDataImages("valid", "Valid") -test_img_count = readAndPrintDataImages("test", "Test") - -f = open("../data/img_counts.txt", "w+") -f.write("Train_img_count = " + str(train_img_count) + "\n") -f.write("Validation_img_count = " + str(valid_img_count) + "\n") -f.write("Test_img_count = " + str(test_img_count) + "\n") -f.close() - -print("FINISHED") diff --git a/data_loader/lap-script.py b/data_loader/lap-script.py deleted file mode 100644 index 3507347..0000000 --- a/data_loader/lap-script.py +++ /dev/null @@ -1,92 +0,0 @@ -import urllib -import os -import csv -import cv2 as cv - -face_cascade = cv.CascadeClassifier('haarcascade_frontalface_default.xml') - - -def createFolder(directory): - try: - if not os.path.exists(directory): - os.makedirs(directory) - except OSError: - print("Error: Creating directory.") - -def createClassificationFolders(directory): - createFolder(directory) - for x in range(101): - createFolder(directory + "/" + str(x)) - -if not os.path.exists("../data/appa-real-release.zip"): - print("Start downloading Zipfile") - urllib.urlretrieve("http://158.109.8.102/AppaRealAge/appa-real-release.zip", "../data/appa-real-release.zip") - print("Finished downloading Zipfile") -else: - print("Zip already downloaded.") - -if not os.path.exists("../data/appa-real-release"): - print("Start unzipping") - import zipfile - with zipfile.ZipFile("../data/appa-real-release.zip", 'r') as zip_ref: - zip_ref.extractall("../data/appa-real-release") - print("Finished unzipping") -else: - print("Already unzipped") - - -folder_directory = "../data/" - -if not os.path.exists(folder_directory + "LAP"): - print("Start creating Classification folders") - createFolder(folder_directory + "LAP") - createClassificationFolders(folder_directory + "LAP/Train") - createClassificationFolders(folder_directory + "LAP/Valid") - createClassificationFolders(folder_directory + "LAP/Test") - print("Finished creating Classification folders") -else: - print("Classification folders already present") - - -def readAndPrintDataImages(type_set_csv, type_set_target): - age = [] - full_path =[] - delta = 5 - counter = 0 - with open("../data/appa-real-release/appa-real-release/gt_avg_" + type_set_csv + ".csv", "r") as f: - reader = csv.reader(f, delimiter="\\") - for i, line in enumerate(reader): - if i != 0: - age_string = line[0].split(',')[4] - age_float = float(age_string) - age.append(age_float) - path = line[0].split(',')[0] - full_path.append(path) - - - for idx, val in enumerate(full_path): - img = cv.imread("../data/appa-real-release/appa-real-release/" + type_set_csv + "/" + val) - faces = face_cascade.detectMultiScale(img, 1.8, 5) - for (x,y,w,h) in faces: - cip = img[y-delta:y+h+delta, x-delta:x+w+delta].copy() - try: - cip = cv.resize(cip, (224, 224)) - counter = counter + 1 - cv.imwrite("../data/LAP/" + type_set_target + "/" + str(int(age[idx])) + "/" + val, cip) - print("Written: " + "../data/LAP/" + type_set_target + "/" + str(int(age[idx])) + "/" + val) - except: - print("Resize Fail") - return counter - -print("Printing Images to Classification Folders") -train_img_count = readAndPrintDataImages("train", "Train") -valid_img_count = readAndPrintDataImages("valid", "Valid") -test_img_count = readAndPrintDataImages("test", "Test") - -f = open("../data/img_counts.txt", "w+") -f.write("Train_img_count = " + str(train_img_count) + "\n") -f.write("Validation_img_count = " + str(valid_img_count) + "\n") -f.write("Test_img_count = " + str(test_img_count) + "\n") -f.close() - -print("FINISHED")