-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_data.py
54 lines (43 loc) · 1.33 KB
/
preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
import shutil
import random
seed = 1
random.seed(seed)
directory = "ISIC/images/"
train = "data/train/"
test = "data/test/"
validation = "data/validation/"
os.makedirs(train + "benign/")
os.makedirs(train + "malignant/")
os.makedirs(test + "benign/")
os.makedirs(test + "malignant/")
os.makedirs(validation + "benign/")
os.makedirs(validation + "malignant/")
test_examples = train_examples = validation_examples = 0
for line in open("ISIC/labels.csv").readlines()[1:]:
split_line = line.split(",")
img_file = split_line[0]
benign_malign = split_line[1]
random_num = random.random()
if random_num < 0.8:
location = train
train_examples += 1
elif random_num < 0.9:
location = validation
validation_examples += 1
else:
location = test
test_examples += 1
if int(float(benign_malign)) == 0:
shutil.copy(
"ISIC/images/" + img_file + ".jpg",
location + "benign/" + img_file + ".jpg",
)
elif int(float(benign_malign)) == 1:
shutil.copy(
"ISIC/images/" + img_file + ".jpg",
location + "malignant/" + img_file + ".jpg",
)
print(f"Number of training examples {train_examples}")
print(f"Number of test examples {test_examples}")
print(f"Number of validation examples {validation_examples}")