-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimls_barcode_reader.py
210 lines (175 loc) · 7.77 KB
/
imls_barcode_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import cv2
import time
import string
import numpy as np
import pandas as pd
import shutil
from pyzbar import pyzbar
from pathlib import Path
from datetime import datetime
# Add location of images folder here
IMAGES_FOLDER = '/Volumes/GoogleDrive/Shared drives/LACMIP Imaging/IMLS Type Specimens/TO_PROCESS'
# Variables used for processing images for barcode reading
BRIGHTNESS = 0
CONTRAST = 0
SHARPEN = 2.0
# Used for the cv2 threshold binary function -- valid values are between 0-255
THRESHOLD = 127
class Date:
"""Utility to add properly formatted dates for dir/file names"""
def __init__(self):
self.date = datetime.now().strftime('%Y-%m-%d')
self.month = datetime.now().strftime('%Y-%m')
def get_timestamp(self):
return datetime.now().strftime('%x %X')
class Ledger:
"""Keeps track of letter order for filename and returns properly formatted filename"""
@staticmethod
def number_to_letter():
"""Returns a dict of all letters and their numerical order"""
letters = string.ascii_lowercase
number_translator = dict(zip([ord(letter) % 32 for letter in letters], letters))
return number_translator
def __init__(self):
self.ledger = {}
self.translator = self.number_to_letter()
def __add_cat_number(self, cat_number):
if cat_number in self.ledger:
self.ledger[cat_number] += 1
else:
self.ledger[cat_number] = 1
def __return_letter(self, cat_number):
self.__add_cat_number(cat_number)
frequency = self.ledger[cat_number]
return self.translator[frequency]
def __format_cat_number(self, number):
return number.replace('.', '-')
def return_filename(self, cat_number, taxon):
letter = self.__return_letter(cat_number)
cat_num = self.__format_cat_number(cat_number)
file_name = f'LACMIP_{cat_num}_{taxon}_{letter}.jpg' if taxon else f'LACMIP_{cat_num}_{letter}.jpg'
return file_name
class FilePaths:
"""Class to contain all file paths"""
day = Date()
def __init__(self, fp=None):
# Get the parent directory of the script where images are, if no filepath is provided
self.images = Path(fp) if fp else Path.cwd().parent
# Filepath for the csv where results of script are recorded
self.records = self.images.joinpath('PhotosRecord.csv')
# Get paths for successes/failure directories
self.successes_parent = self.images.joinpath('successes')
self.failures_parent = self.images.joinpath('failures')
self.successes = self.successes_parent.joinpath(f'{self.day.month}_successes')
self.failures = self.failures_parent.joinpath(f'{self.day.month}_failures')
# Make success/failure directories if they don't already exist
self.successes_parent.mkdir(parents=True, exist_ok=True)
self.failures_parent.mkdir(parents=True, exist_ok=True)
self.successes.mkdir(parents=True, exist_ok=True)
self.failures.mkdir(parents=True, exist_ok=True)
def save_to_records(self, record_dict):
# If no photos were processed, do not write to the csv file
if record_dict["TOTAL"] == 0:
return
record_df = pd.DataFrame(record_dict, index=[0])
if self.records.exists():
records_df = pd.read_csv(str(self.records))
records_df = records_df.append(record_df, ignore_index=True)
else:
records_df = record_df
records_df.to_csv(str(self.records), index=False)
class Taxonomy:
fields = ['catalogNumber', 'scientificName']
def __init__(self):
self.df = pd.read_csv('taxonomy.csv', usecols=self.fields)
def return_taxon(self, cat_num):
row = self.df.loc[self.df.catalogNumber == cat_num]
if not row.empty:
scientific_name = row['scientificName'].values[0]
taxon = scientific_name if scientific_name else None
else:
taxon = None
return taxon.replace(' ', '_')
class Image:
"""Reads in an image and provides processing with cv2"""
def __init__(self, fp):
self.image = cv2.imread(str(fp), 0)
self.original_image = cv2.imread(str(fp))
def sharpen(self, kernel_size=(5, 5), sigma=1.0, amount=SHARPEN, threshold=1):
"""Return a sharpened version of the image, using an unsharp mask."""
blurred = cv2.GaussianBlur(self.image, kernel_size, sigma)
sharpened = float(amount + 1) * self.image - float(amount) * blurred
sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
sharpened = sharpened.round().astype(np.uint8)
if threshold > 0:
low_contrast_mask = np.absolute(self.image - blurred) < threshold
np.copyto(sharpened, self.image, where=low_contrast_mask)
self.image = sharpened
def apply_brightness_contrast(self, brightness=BRIGHTNESS, contrast=CONTRAST):
"""Return an image with increased/decreased brightness and/or contrast"""
if brightness != 0:
if brightness > 0:
shadow = brightness
highlight = 255
else:
shadow = 0
highlight = 255 + brightness
alpha_b = (highlight - shadow) / 255
gamma_b = shadow
buf = cv2.addWeighted(self.image, alpha_b, self.image, 0, gamma_b)
else:
buf = self.image.copy()
if contrast != 0:
f = 131 * (contrast + 127) / (127 * (131 - contrast))
alpha_c = f
gamma_c = 127 * (1 - f)
buf = cv2.addWeighted(buf, alpha_c, buf, 0, gamma_c)
self.image = buf
def binarize(self, threshold=THRESHOLD):
ret, self.image = cv2.threshold(self.image, THRESHOLD, 255, cv2.THRESH_BINARY)
def read_barcode(self):
self.apply_brightness_contrast()
self.binarize()
self.sharpen()
barcodes = pyzbar.decode(self.image)
return barcodes
def main(dir=None):
"""Iterate over directory and detect all barcodes in .jpg images and print the barcode data"""
start = time.time()
ledger = Ledger()
paths = FilePaths(fp=dir)
taxonomy = Taxonomy()
day = Date()
stats = {'SUCCESSES': 0, 'FAILURES': 0, 'TOTAL': 0, 'DATE': day.get_timestamp()}
for fp in paths.images.iterdir():
if fp.suffix in ['.jpg', '.jpeg', '.JPG', '.jpeg']:
image = Image(fp)
barcodes = image.read_barcode()
if barcodes:
barcode_values = [b.data.decode('utf-8') for b in barcodes]
if 'LABELS' in barcode_values:
barcode_values.remove('LABELS')
taxon = None
barcode_value = f'{barcode_values[0]}_labels'
else:
taxon = taxonomy.return_taxon(f'LACMIP {barcode_values[0]}')
barcode_value = barcode_values[0]
print(f'{fp.stem}: {barcode_value}')
stats['SUCCESSES'] += 1
stats["TOTAL"] += 1
file_name = ledger.return_filename(barcode_value, taxon)
save_path = paths.successes.joinpath(file_name)
cv2.imwrite(str(save_path), image.original_image)
else:
print(f'{fp.stem}: Null')
stats['FAILURES'] += 1
stats["TOTAL"] += 1
save_path = paths.failures.joinpath(f'{fp.stem}_{day.date}_FAILURE.jpg')
cv2.imwrite(str(save_path), image.image)
# Delete original image file
fp.unlink()
print(f'{stats["SUCCESSES"]} successes \n{stats["FAILURES"]} failures.\
\nThis function took {round(time.time()-start)} seconds')
paths.save_to_records(stats)
main(dir=IMAGES_FOLDER)