Skip to content

Commit

Permalink
Merge pull request #53 from remotesensinginfo/sort_drone_images
Browse files Browse the repository at this point in the history
Added function to sort a set of drone images
  • Loading branch information
petebunting authored Jul 13, 2022
2 parents a861f0d + 47e4e2e commit 16e31d0
Show file tree
Hide file tree
Showing 3 changed files with 236 additions and 7 deletions.
8 changes: 8 additions & 0 deletions doc/python/source/rsgislib_tools_imagetools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,11 @@ Animate Images
-----------------
.. autofunction:: rsgislib.tools.imagetools.animate_img_set

EXIF Metadata
-----------------
.. autofunction:: rsgislib.tools.imagetools.list_exif_info

Sort Images
-----------------
.. autofunction:: rsgislib.tools.imagetools.split_photos_by_time

219 changes: 219 additions & 0 deletions python/rsgislib/tools/imagetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,95 @@
import rsgislib.tools.utils


class ImageTimeCluster(object):
"""
A class which supports the split_photos_by_time function.
"""

def __init__(self, cluster_id=None, init_time=None):
"""
:param cluster_id: unique id for the cluster
:param init_time: is the initial time of the first image
"""
self.cluster_id = cluster_id
self.start_time = init_time
self.end_time = init_time
self.images = list()

def within_cluster(self, date_time_obj, time_split_secs=60):
date_time_in_cluster = False
start_diff = self.start_time - date_time_obj
if abs(start_diff.total_seconds()) < time_split_secs:
date_time_in_cluster = True

if not date_time_in_cluster:
end_diff = self.end_time - date_time_obj
if abs(end_diff.total_seconds()) < time_split_secs:
date_time_in_cluster = True
return date_time_in_cluster

def add_image_to_cluster(self, image_file, date_time_obj):
self.images.append(image_file)
if date_time_obj < self.start_time:
self.start_time = date_time_obj
elif date_time_obj > self.end_time:
self.end_time = date_time_obj

def does_cluster_overlap(self, cluster_obj, time_split_secs=60):
date_time_in_cluster = False
time_diff = self.start_time - cluster_obj.start_time
if abs(time_diff.total_seconds()) < time_split_secs:
date_time_in_cluster = True

if not date_time_in_cluster:
time_diff = self.end_time - cluster_obj.end_time
if abs(time_diff.total_seconds()) < time_split_secs:
date_time_in_cluster = True

if not date_time_in_cluster:
time_diff = self.start_time - cluster_obj.end_time
if abs(time_diff.total_seconds()) < time_split_secs:
date_time_in_cluster = True

if not date_time_in_cluster:
time_diff = self.end_time - cluster_obj.start_time
if abs(time_diff.total_seconds()) < time_split_secs:
date_time_in_cluster = True

if not date_time_in_cluster:
if (cluster_obj.start_time > self.start_time) and (
cluster_obj.start_time < self.end_time
):
date_time_in_cluster = True

if not date_time_in_cluster:
if (cluster_obj.end_time > self.start_time) and (
cluster_obj.end_time < self.end_time
):
date_time_in_cluster = True

return date_time_in_cluster

def merge_cluster(self, cluster_obj):
self.images = self.images + cluster_obj.images
if cluster_obj.start_time < self.start_time:
self.start_time = cluster_obj.start_time
elif cluster_obj.end_time > self.end_time:
self.end_time = cluster_obj.end_time

def __str__(self):
return "{}: {} - {} = {}".format(
self.cluster_id, self.start_time, self.end_time, len(self.images)
)

def __repr__(self):
return "{}: {} - {} = {}".format(
self.cluster_id, self.start_time, self.end_time, len(self.images)
)


def animate_img_set(
input_imgs: List[str],
output_img: str,
Expand Down Expand Up @@ -78,3 +167,133 @@ def animate_img_set(

if resize:
shutil.rmtree(lcl_tmp_dir)


def list_exif_info(input_img):
"""
A function which lists the exif information from an input image
using the PIL module.
:param input_img: the path to the input image
"""
from PIL import Image
from PIL.ExifTags import TAGS

pil_image = Image.open(input_img)
pil_exif_data = pil_image.getexif()

for tag_id in pil_exif_data:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
data = pil_exif_data.get(tag_id)
# decode bytes
if isinstance(data, bytes):
data = data.decode()
print(f"{tag:25}: {data}")
pil_image.close()


def split_photos_by_time(
input_imgs,
output_base_dir,
time_split_secs=60,
date_time_tag="DateTime",
date_time_format="%Y:%m:%d %H:%M:%S",
out_date_time_format="%Y%m%d_%H%M%S",
):
"""
A function which splits a set of images (e.g., from drone surveys) by time.
Where photos taken within the specified time window will be clustered together
(default: 60 seconds). The photos are then copied into a new directory based on
the start time of the cluster. Note, photos are copied and not moved from their
current location.
:param input_imgs: a list of input images (e.g., glob.glob("*.jpg"))
:param output_base_dir: the output base directory where the output directories
will be created.
:param time_split_secs: The time difference in seconds for a new cluster to be
created.
:param date_time_tag: the EXIF tag for the date / time of the acquisition.
Default is 'DataTime' but this might be different for
different cameras and therefore you might need to use the
list_exif_info function to find the correct field.
:param date_time_format: The python datetime.strptime format string for the
datetime provided in the EXIF metadata.
Default: %Y:%m:%d %H:%M:%S
:param out_date_time_format: The python datetime.strftime format string for the
used to create the output directory for the files.
Note, this must be unique. Default: %Y%m%d_%H%M%S
"""

from PIL import Image
from PIL.ExifTags import TAGS
import datetime
import tqdm

img_dt_lut = dict()
for img_file in input_imgs:
pil_image = Image.open(img_file)
pil_exif_data = pil_image.getexif()

for tag_id in pil_exif_data:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
data = pil_exif_data.get(tag_id)
# decode bytes
if isinstance(data, bytes):
data = data.decode()
if tag == date_time_tag:
img_dt_lut[img_file] = data

img_clusters = list()
next_cluster_id = 0
first = True
for img_file in img_dt_lut:
date_time_obj = datetime.datetime.strptime(
img_dt_lut[img_file], date_time_format
)

if first:
img_cluster_obj = ImageTimeCluster(next_cluster_id, date_time_obj)
img_cluster_obj.images.append(img_file)
img_clusters.append(img_cluster_obj)
next_cluster_id += 1
first = False
else:
for img_cluster in img_clusters:
if img_cluster.within_cluster(date_time_obj, time_split_secs):
img_cluster.add_image_to_cluster(img_file, date_time_obj)
break
else:
img_cluster_obj = ImageTimeCluster(next_cluster_id, date_time_obj)
img_cluster_obj.images.append(img_file)
img_clusters.append(img_cluster_obj)
next_cluster_id += 1
break

merged_cluster = False
for img_cluster_out in img_clusters:
for img_cluster_in in img_clusters:
if img_cluster_out.cluster_id != img_cluster_in.cluster_id:
cluster_overlap = img_cluster_out.does_cluster_overlap(
img_cluster_in
)
if cluster_overlap:
merged_cluster = True
img_cluster_out.merge_cluster(img_cluster_in)
img_clusters.remove(img_cluster_in)
break
if merged_cluster:
break

for img_cluster in img_clusters:
print("Copying Files for: {}".format(img_cluster))
out_dir_name = img_cluster.start_time.strftime(out_date_time_format)
out_dir_path = os.path.join(output_base_dir, out_dir_name)
print("Output Directory: {}".format(out_dir_path))
if not os.path.exists(out_dir_path):
os.mkdir(out_dir_path)
for img in tqdm.tqdm(img_cluster.images):
shutil.copy(img, out_dir_path)
16 changes: 9 additions & 7 deletions python_tests/test_tools_ftptools.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
ftp_user_ref = "dlpuser"
ftp_pass_ref = "rNrKYTX9g7z3RgJRmxWuGHbeu"


@pytest.mark.skipif(True, reason="Skip test as timeout can be a problem for ci.")
def test_upload_ftp_file():
import rsgislib.tools.ftptools

Expand Down Expand Up @@ -63,10 +63,11 @@ def test_get_ftp_file_listings():
assert len(non_dirs_lst) > 0


@pytest.mark.skipif(
True,
reason="pysondb dependency not available and test currently skipped as timeout can be a problem for ci.",
)
#@pytest.mark.skipif(
# True,
# reason="pysondb dependency not available and test currently skipped as timeout can be a problem for ci.",
#)
@pytest.mark.skipif(True, reason="Skip test as timeout can be a problem for ci.")
def test_create_file_listings_db(tmp_path):
import rsgislib.tools.ftptools

Expand All @@ -93,7 +94,7 @@ def test_create_file_listings_db(tmp_path):

assert os.path.exists(out_db_file)


@pytest.mark.skipif(True, reason="Skip test as timeout can be a problem for ci.")
def test_download_ftp_file(tmp_path):
import rsgislib.tools.ftptools

Expand Down Expand Up @@ -121,7 +122,8 @@ def test_download_ftp_file(tmp_path):
assert os.path.exists(out_file)


@pytest.mark.skipif(PYCURL_DB_NOT_AVAIL, reason="pycurl dependency not available")
#@pytest.mark.skipif(PYCURL_DB_NOT_AVAIL, reason="pycurl dependency not available")
@pytest.mark.skipif(True, reason="Skip test as timeout can be a problem for ci.")
def test_download_curl_ftp_file(tmp_path):
import rsgislib.tools.ftptools

Expand Down

0 comments on commit 16e31d0

Please sign in to comment.