diff --git a/.github/workflows/distribute.yml b/.github/workflows/distribute.yml index 034c4ac0..4ac1e70e 100644 --- a/.github/workflows/distribute.yml +++ b/.github/workflows/distribute.yml @@ -29,26 +29,7 @@ jobs: python -m build - name: upload to PyPI.org - uses: pypa/gh-action-pypi-publish@v1.8.10 + uses: pypa/gh-action-pypi-publish@v1.8.12 with: user: __token__ password: ${{ secrets.TOOLS_PYPI_PAK }} - - verify-distribution: - runs-on: ubuntu-latest - needs: - - call-version-info-workflow - - distribute - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@v4 - - - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: environment.yml - - - name: Ensure asf_tools v${{ needs.call-version-info-workflow.outputs.version }}} is pip installable - run: | - python -m pip install asf_tools==${{ needs.call-version-info-workflow.outputs.version_tag }} diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index 3c258e53..723ac27b 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -8,7 +8,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.10" diff --git a/CHANGELOG.md b/CHANGELOG.md index 281f3d5d..dd1d0757 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.7.0] + +## Added +* Scripts and entrypoints for generating our global watermasking dataset added to `watermasking`. + ## [0.6.0] diff --git a/environment.yml b/environment.yml index 7646b3e7..1f49abd9 100644 --- a/environment.yml +++ b/environment.yml @@ -23,7 +23,10 @@ dependencies: - boto3 - fiona - gdal>=3.7 + - geopandas - numpy + - osmium-tool + - pyogrio - pysheds>=0.3 - rasterio - scikit-fuzzy diff --git a/pyproject.toml b/pyproject.toml index 4a5f0959..893453cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,14 +25,15 @@ dependencies = [ "astropy", "fiona", "gdal>=3.3", + "geopandas", "numpy", + "pyogrio", "pysheds>=0.3", "rasterio", "scikit-fuzzy", "scikit-image", "scipy", - "shapely", - "tqdm", + "shapely" ] dynamic = ["version"] @@ -41,6 +42,9 @@ make_composite = "asf_tools.composite:main" water_map = "asf_tools.hydrosar.water_map:main" calculate_hand = "asf_tools.hydrosar.hand.calculate:main" flood_map = "asf_tools.hydrosar.flood_map:main" +generate_osm_dataset = "asf_tools.watermasking.generate_osm_tiles:main" +generate_worldcover_dataset = "asf_tools.watermasking.generate_worldcover_tiles:main" +fill_missing_tiles = "asf_tools.watermasking.fill_missing_tiles:main" [project.entry-points.hyp3] water_map = "asf_tools.hydrosar.water_map:hyp3" diff --git a/src/asf_tools/watermasking/README.MD b/src/asf_tools/watermasking/README.MD new file mode 100644 index 00000000..dde40451 --- /dev/null +++ b/src/asf_tools/watermasking/README.MD @@ -0,0 +1,18 @@ +These scripts are for creating a global (or regional) water mask dataset based off of OpenStreetMaps, and optionally augmented by ESA WorldCover. + +For the OSM water mask dataset, follow these steps to replicate our dataset: + +1. Download the "Latest Weekly Planet PBF File" file from here: "https://planet.openstreetmap.org/". +2. Download the WGS84 water polygons shapefile from: "https://osmdata.openstreetmap.de/data/water-polygons.html". +3. The files should be unzipped and you should have something like `planet.osm.pbf` or `planet.pbf` and `water_polygons.shp` (and the support files for `water_polygons.shp`). +4. Run ```generate_osm_dataset --planet-file-path [path-to-planet.pbf] --ocean-polygons-path [path-to-water-polygons.shp] --lat-begin -85 --lat-end 85 --lon-begin -180 --lon-end 180 --tile-width 5 --tile-height 5``` +5. Run ```fill_missing_tiles --fill-value 0 --lat-begin -90 --lat-end -85 --lon-begin -180 --lon-end 180 --tile-width 5 --tile-height 5``` +6. Run ```fill_missing_tiles --fill-value 1 --lat-begin 85 --lat-end 90 --lon-begin -180 --lon-end 180 --tile-width 5 --tile-height 5``` + +For the WorldCover water mask dataset, follow these steps: + +1. Download the portions of the dataset for the areas you would like to cover from here: "https://worldcover2020.esa.int/downloader" +2. Extract the contents into a folder. Note, if you download multiple portions of the dataset, extract them all into the same folder. +3. Run ```generate_worldcover_dataset --worldcover-tiles-dir [path-to-worldcover-data] --lat-begin 55 --lat-end 80 --lon-begin -180 --lon-end 180 --tile-width 5 --tile-height 5``` + +Note that we only use WorldCover data over Alaska, Canada, and Russia for our dataset. diff --git a/src/asf_tools/watermasking/fill_missing_tiles.py b/src/asf_tools/watermasking/fill_missing_tiles.py new file mode 100644 index 00000000..ea9b64a2 --- /dev/null +++ b/src/asf_tools/watermasking/fill_missing_tiles.py @@ -0,0 +1,75 @@ +import argparse +import os +import subprocess + +import numpy as np +from osgeo import gdal, osr + +from asf_tools.watermasking.utils import lat_lon_to_tile_string + + +gdal.UseExceptions() + + +def main(): + + parser = argparse.ArgumentParser( + prog='fill_missing_tiles.py', + description='Script for creating filled tifs in areas with missing tiles.' + ) + + parser.add_argument('--fill-value', help='The value to fill the data array with.', default=0) + parser.add_argument('--lat-begin', help='The minimum latitude of the dataset in EPSG:4326.', default=-85) + parser.add_argument('--lat-end', help='The maximum latitude of the dataset in EPSG:4326.', default=85) + parser.add_argument('--lon-begin', help='The minimum longitude of the dataset in EPSG:4326.', default=-180) + parser.add_argument('--lon-end', help='The maximum longitude of the dataset in EPSG:4326.', default=180) + parser.add_argument('--tile-width', help='The desired width of the tile in degrees.', default=5) + parser.add_argument('--tile-height', help='The desired height of the tile in degrees.', default=5) + + args = parser.parse_args() + + fill_value = int(args.fill_value) + lat_begin = int(args.lat_begin) + lat_end = int(args.lat_end) + lon_begin = int(args.lon_begin) + lon_end = int(args.lon_end) + tile_width = int(args.tile_width) + tile_height = int(args.tile_height) + + lat_range = range(lat_begin, lat_end, tile_height) + lon_range = range(lon_begin, lon_end, tile_width) + + for lat in lat_range: + for lon in lon_range: + + tile = lat_lon_to_tile_string(lat, lon, is_worldcover=False, postfix='') + tile_tif = 'tiles/' + tile + '.tif' + tile_cog = 'tiles/cogs/' + tile + '.tif' + + print(f'Processing: {tile}') + + xmin, ymin = lon, lat + pixel_size_x = 0.00009009009 + pixel_size_y = 0.00009009009 + + # All images in the dataset should be this size. + data = np.empty((55500, 55500)) + data.fill(fill_value) + + driver = gdal.GetDriverByName('GTiff') + dst_ds = driver.Create(tile_tif, xsize=data.shape[0], ysize=data.shape[1], bands=1, eType=gdal.GDT_Byte) + dst_ds.SetGeoTransform([xmin, pixel_size_x, 0, ymin, 0, pixel_size_y]) + srs = osr.SpatialReference() + srs.ImportFromEPSG(4326) + dst_ds.SetProjection(srs.ExportToWkt()) + dst_band = dst_ds.GetRasterBand(1) + dst_band.WriteArray(data) + del dst_ds + + command = f'gdal_translate -of COG -co NUM_THREADS=all_cpus {tile_tif} {tile_cog}'.split(' ') + subprocess.run(command) + os.remove(tile_tif) + + +if __name__ == '__main__': + main() diff --git a/src/asf_tools/watermasking/generate_osm_tiles.py b/src/asf_tools/watermasking/generate_osm_tiles.py new file mode 100644 index 00000000..4b934900 --- /dev/null +++ b/src/asf_tools/watermasking/generate_osm_tiles.py @@ -0,0 +1,253 @@ +import argparse +import os +import subprocess +import time + +import geopandas as gpd +from osgeo import gdal + +from asf_tools.watermasking.utils import lat_lon_to_tile_string, remove_temp_files, setup_directories + + +gdal.UseExceptions() + +INTERIOR_TILE_DIR = 'interior_tiles/' +OCEAN_TILE_DIR = 'ocean_tiles/' +FINISHED_TILE_DIR = 'tiles/' +GDAL_OPTIONS = ['COMPRESS=LZW', 'NUM_THREADS=all_cpus'] + + +def process_pbf(planet_file: str, output_file: str): + """Process the global PBF file so that it only contains water features. + + Args: + planet_file: The path to the OSM Planet PBF file. + output_file: The desired path of the processed PBF file. + """ + + natural_file = 'planet_natural.pbf' + waterways_file = 'planet_waterways.pbf' + reservoirs_file = 'planet_reservoirs.pbf' + + natural_water_command = f'osmium tags-filter -o {natural_file} {planet_file} wr/natural=water'.split(' ') + waterways_command = f'osmium tags-filter -o {waterways_file} {planet_file} waterway="*"'.split(' ') + reservoirs_command = f'osmium tags-filter -o {reservoirs_file} {planet_file} landuse=reservoir'.split(' ') + merge_command = f'osmium merge {natural_file} {waterways_file} {reservoirs_file} -o {output_file}'.split(' ') + + subprocess.run(natural_water_command) + subprocess.run(waterways_command) + subprocess.run(reservoirs_command) + subprocess.run(merge_command) + + +def process_ocean_tiles(ocean_polygons_path, lat, lon, tile_width_deg, tile_height_deg, output_dir): + """Process and crop OSM ocean polygons into a tif tile. + + Args: + ocean_polygons_path: The path to the global ocean polygons file from OSM. + lat: The minimum latitude of the tile. + lon: The minimum longitude of the tile. + tile_width_deg: The width of the tile in degrees. + tile_height_deg: The height of the tile in degrees. + """ + + tile = lat_lon_to_tile_string(lat, lon, is_worldcover=False, postfix='') + tile_tif = output_dir + tile + '.tif' + + xmin, xmax, ymin, ymax = lon, lon+tile_width_deg, lat, lat+tile_height_deg + pixel_size_x = 0.00009009009 + pixel_size_y = 0.00009009009 + + clipped_polygons_path = tile + '.shp' + command = f'ogr2ogr -clipsrc {xmin} {ymin} {xmax} {ymax} {clipped_polygons_path} {ocean_polygons_path}'.split(' ') + subprocess.run(command) + + gdal.Rasterize( + tile_tif, + clipped_polygons_path, + xRes=pixel_size_x, + yRes=pixel_size_y, + burnValues=1, + outputBounds=[xmin, ymin, xmax, ymax], + outputType=gdal.GDT_Byte, + creationOptions=GDAL_OPTIONS + ) + + temp_files = [tile + '.dbf', tile + '.cpg', tile + '.prj', tile + '.shx', tile + '.shp'] + remove_temp_files(temp_files) + + +def extract_water(water_file, lat, lon, tile_width_deg, tile_height_deg, interior_tile_dir): + """Rasterize a water tile from the processed global PBF file. + + Args: + water_file: The path to the processed global PBF file. + lat: The minimum latitude of the tile. + lon: The minimum longitude of the tile. + tile_width_deg: The desired width of the tile in degrees. + tile_height_deg: The desired height of the tile in degrees. + """ + + tile = lat_lon_to_tile_string(lat, lon, is_worldcover=False, postfix='') + tile_pbf = tile + '.osm.pbf' + tile_tif = interior_tile_dir + tile + '.tif' + tile_shp = tile + '.shp' + tile_geojson = tile + '.geojson' + + # Extract tile from the main pbf, then convert it to a tif. + bbox = f'--bbox {lon},{lat},{lon+tile_width_deg},{lat+tile_height_deg}' + extract_command = f'osmium extract -s smart -S tags=natural=water {bbox} {water_file} -o {tile_pbf}'.split(' ') + export_command = f'osmium export --geometry-types=polygon {tile_pbf} -o {tile_geojson}'.split(' ') + subprocess.run(extract_command) + subprocess.run(export_command) + + # Islands and Islets can be members of the water features, so they must be removed. + water_gdf = gpd.read_file(tile_geojson, engine='pyogrio') + try: + water_gdf = water_gdf.drop(water_gdf[water_gdf['place'] == 'island'].index) + water_gdf = water_gdf.drop(water_gdf[water_gdf['place'] == 'islet'].index) + except KeyError: + # When there are no islands to remove, an AttributeError should throw, but we don't care about it. + pass + water_gdf.to_file(tile_shp, mode='w', engine='pyogrio') + water_gdf = None + + xmin, xmax, ymin, ymax = lon, lon+tile_width_deg, lat, lat+tile_height_deg + pixel_size_x = 0.00009009009 + pixel_size_y = 0.00009009009 + + gdal.Rasterize( + tile_tif, + tile_shp, + xRes=pixel_size_x, + yRes=pixel_size_y, + burnValues=1, + outputBounds=[xmin, ymin, xmax, ymax], + outputType=gdal.GDT_Byte, + creationOptions=GDAL_OPTIONS + ) + + temp_files = [tile + '.dbf', tile + '.cpg', tile + '.prj', tile + '.shx', tile_shp, tile_pbf, tile_geojson] + remove_temp_files(temp_files) + + +def merge_interior_and_ocean(internal_tile_dir, ocean_tile_dir, finished_tile_dir, translate_to_cog: bool = False): + """Merge the interior water tiles and ocean water tiles. + + Args: + interior_tile_dir: The path to the directory containing the interior water tiles. + ocean_tile_dir: The path to the directory containing the ocean water tiles. + merged_tile_dir: The path to the directory containing the merged water tiles. + """ + index = 0 + num_tiles = len([f for f in os.listdir(internal_tile_dir) if f.endswith('tif')]) - 1 + for filename in os.listdir(internal_tile_dir): + if filename.endswith('.tif'): + start_time = time.time() + + internal_tile = internal_tile_dir + filename + external_tile = ocean_tile_dir + filename + output_tile = finished_tile_dir + filename + command = [ + 'gdal_calc.py', + '-A', + internal_tile, + '-B', + external_tile, + '--format', + 'GTiff', + '--outfile', + output_tile, + '--calc', + '"logical_or(A, B)"' + ] + subprocess.run(command) + + if translate_to_cog: + cogs_dir = finished_tile_dir + 'cogs/' + try: + os.mkdir(cogs_dir) + except FileExistsError: + pass + out_file = cogs_dir + filename + translate_string = 'gdal_translate -ot Byte -of COG -co NUM_THREADS=all_cpus' + command = f'{translate_string} {output_tile} {out_file}'.split(' ') + subprocess.run(command) + os.remove(output_tile) + + end_time = time.time() + total_time = end_time - start_time + + print(f'Elapsed Time: {total_time}(s)') + print(f'Completed {index} of {num_tiles}') + + index += 1 + + +def main(): + + parser = argparse.ArgumentParser( + prog='generate_osm_tiles.py', + description='Main script for creating a tiled watermask dataset from OSM data.' + ) + + parser.add_argument('--planet-file-path', help='The path to the global planet.pbf file.') + parser.add_argument('--ocean-polygons-path', help='The path to the global OSM ocean polygons.') + parser.add_argument('--lat-begin', help='The minimum latitude of the dataset in EPSG:4326.', default=-85) + parser.add_argument('--lat-end', help='The maximum latitude of the dataset in EPSG:4326.', default=85) + parser.add_argument('--lon-begin', help='The minimum longitude of the dataset in EPSG:4326.', default=-180) + parser.add_argument('--lon-end', help='The maximum longitude of the dataset in EPSG:4326.', default=180) + parser.add_argument('--tile-width', help='The desired width of the tile in degrees.', default=5) + parser.add_argument('--tile-height', help='The desired height of the tile in degrees.', default=5) + + args = parser.parse_args() + + lat_begin = int(args.lat_begin) + lat_end = int(args.lat_end) + lon_begin = int(args.lon_begin) + lon_end = int(args.lon_end) + tile_width = int(args.tile_width) + tile_height = int(args.tile_height) + + setup_directories([INTERIOR_TILE_DIR, OCEAN_TILE_DIR, FINISHED_TILE_DIR]) + + print('Extracting water from planet file...') + processed_pbf_path = 'planet_processed.pbf' + process_pbf(args.planet_file_path, processed_pbf_path) + + print('Processing tiles...') + lat_range = range(lat_begin, lat_end, tile_height) + lon_range = range(lon_begin, lon_end, tile_width) + num_tiles = len(lat_range) * len(lon_range) - 1 + index = 0 + for lat in lat_range: + for lon in lon_range: + tile_name = lat_lon_to_tile_string(lat, lon, is_worldcover=False) + start_time = time.time() + extract_water( + processed_pbf_path, + lat, + lon, + tile_width, + tile_height, + interior_tile_dir=INTERIOR_TILE_DIR + ) + process_ocean_tiles( + args.ocean_polygons_path, + lat, + lon, + tile_width, + tile_height, + output_dir=OCEAN_TILE_DIR + ) + end_time = time.time() + total_time = end_time - start_time + print(f'Finished initial creation of {tile_name} in {total_time}(s). {index} of {num_tiles}') + index += 1 + + print('Merging processed tiles...') + merge_interior_and_ocean(INTERIOR_TILE_DIR, OCEAN_TILE_DIR, FINISHED_TILE_DIR, translate_to_cog=True) + + +if __name__ == '__main__': + main() diff --git a/src/asf_tools/watermasking/generate_worldcover_tiles.py b/src/asf_tools/watermasking/generate_worldcover_tiles.py new file mode 100644 index 00000000..508f8883 --- /dev/null +++ b/src/asf_tools/watermasking/generate_worldcover_tiles.py @@ -0,0 +1,294 @@ +import argparse +import os +import time +from pathlib import Path + +import numpy as np +from osgeo import gdal + +from asf_tools.watermasking.utils import lat_lon_to_tile_string, merge_tiles, remove_temp_files, setup_directories + + +gdal.UseExceptions() + +PREPROCESSED_TILE_DIR = 'worldcover_tiles_preprocessed/' +UNCROPPED_TILE_DIR = 'worldcover_tiles_uncropped/' +CROPPED_TILE_DIR = 'worldcover_tiles/' +FILENAME_POSTFIX = '.tif' +WORLDCOVER_TILE_SIZE = 3 +GDAL_OPTIONS = ['COMPRESS=LZW', 'TILED=YES', 'NUM_THREADS=all_cpus'] + + +def tile_preprocessing(tile_dir, min_lat, max_lat, min_lon, max_lon): + """The worldcover tiles have lots of unnecessary classes, these need to be removed first. + Note: make a back-up copy of this directory. + + Args: + tile_dir: The directory containing all of the worldcover tiles. + """ + + filenames = [f for f in os.listdir(tile_dir) if f.endswith('.tif')] + + def filename_filter(filename): + latitude = int(filename.split('_')[5][1:3]) + longitude = int(filename.split('_')[5][4:7]) + if filename.split('_')[5][3] == 'W': + longitude = -longitude + mnlat = min_lat - (min_lat % WORLDCOVER_TILE_SIZE) + mnlon = min_lon - (min_lon % WORLDCOVER_TILE_SIZE) + mxlat = max_lat + (max_lat % WORLDCOVER_TILE_SIZE) + mxlon = max_lon + (max_lon % WORLDCOVER_TILE_SIZE) + in_lat_range = (latitude >= mnlat) and (latitude <= mxlat) + in_lon_range = (longitude >= mnlon) and (longitude <= mxlon) + return in_lat_range and in_lon_range + filenames_filtered = [f for f in filenames if filename_filter(f)] + + index = 0 + num_tiles = len(filenames_filtered) + for filename in filenames_filtered: + + start_time = time.time() + + tile_name = filename.split('_')[5] + filename = str(Path(tile_dir) / filename) + dst_filename = PREPROCESSED_TILE_DIR + tile_name + '.tif' + + print(f'Processing: {filename} --- {dst_filename} -- {index} of {num_tiles}') + + src_ds = gdal.Open(filename) + src_band = src_ds.GetRasterBand(1) + src_arr = src_band.ReadAsArray() + + not_water = np.logical_and(src_arr != 80, src_arr != 0) + water_arr = np.ones(src_arr.shape) + water_arr[not_water] = 0 + + driver = gdal.GetDriverByName('GTiff') + dst_ds = driver.Create( + dst_filename, + water_arr.shape[0], + water_arr.shape[1], + 1, + gdal.GDT_Byte, + options=GDAL_OPTIONS + ) + dst_ds.SetGeoTransform(src_ds.GetGeoTransform()) + dst_ds.SetProjection(src_ds.GetProjection()) + dst_band = dst_ds.GetRasterBand(1) + dst_band.WriteArray(water_arr) + dst_band.FlushCache() + + del dst_ds + del src_ds + + end_time = time.time() + total_time = end_time - start_time + + print(f'Processing {dst_filename} took {total_time} seconds.') + + index += 1 + + +def create_missing_tiles(tile_dir, lat_range, lon_range): + """Check for, and build, tiles that may be missing from WorldCover, such as over the ocean. + + Args: + lat_range: The range of latitudes to check. + lon_range: The range of longitudes to check. + Returns: + current_existing_tiles: The list of tiles that exist after the function has completed. + """ + current_existing_tiles = [f for f in os.listdir(tile_dir) if f.endswith(FILENAME_POSTFIX)] + for lon in lon_range: + for lat in lat_range: + tile = lat_lon_to_tile_string(lat, lon, is_worldcover=True) + print(f'Checking {tile}') + if tile not in current_existing_tiles: + print(f'Could not find {tile}') + + filename = PREPROCESSED_TILE_DIR + tile + x_size, y_size = 36000, 36000 + x_res, y_res = 8.333333333333333055e-05, -8.333333333333333055e-05 + ul_lon = lon + ul_lat = lat + WORLDCOVER_TILE_SIZE + geotransform = (ul_lon, x_res, 0, ul_lat, 0, y_res) + + driver = gdal.GetDriverByName('GTiff') + ds = driver.Create( + filename, + xsize=x_size, + ysize=y_size, + bands=1, + eType=gdal.GDT_Byte, + options=GDAL_OPTIONS + ) + ds.SetProjection('EPSG:4326') + ds.SetGeoTransform(geotransform) + band = ds.GetRasterBand(1) # Write ones, as tiles should only be missing over water. + band.WriteArray(np.ones((x_size, y_size))) + + del ds + del band + + current_existing_tiles.append(tile) + print(f'Added {tile}') + return current_existing_tiles + + +def get_tiles(osm_tile_coord: tuple, wc_tile_width: int, tile_width: int): + """Get a list of the worldcover tile locations necessary to fully cover an OSM tile. + + Args: + osm_tile_coord: The lower left corner coordinate (lat, lon) of the desired OSM tile. + wc_tile_width: The width/height of the Worldcover tiles in degrees. + tile_width: The width/height of the OSM tiles in degrees. + + Returns: + tiles: A list of the lower left corner coordinates of the Worldcover tiles that overlap the OSM tile. + """ + + osm_lat = osm_tile_coord[0] + osm_lon = osm_tile_coord[1] + + min_lat = osm_lat - (osm_lat % wc_tile_width) + max_lat = osm_lat + tile_width + min_lon = osm_lon - (osm_lon % wc_tile_width) + max_lon = osm_lon + tile_width + + lats = range(min_lat, max_lat, wc_tile_width) + lons = range(min_lon, max_lon, wc_tile_width) + + tiles = [] + for lat in lats: + for lon in lons: + tiles.append((lat, lon)) + + return tiles + + +def lat_lon_to_filenames(worldcover_tile_dir, osm_tile_coord: tuple, wc_tile_width: int, tile_width: int): + """Get a list of the Worldcover tile filenames that are necessary to overlap an OSM tile. + + Args: + osm_tile: The lower left corner (lat, lon) of the desired OSM tile. + wc_tile_width: The width of the Worldcover tiles in degrees. + tile_width: The width of the OSM tiles in degrees. + + Returns: + filenames: The list of Worldcover filenames. + """ + filenames = [] + tiles = get_tiles(osm_tile_coord, wc_tile_width, tile_width) + for tile in tiles: + filenames.append(worldcover_tile_dir + lat_lon_to_tile_string(tile[0], tile[1], is_worldcover=True)) + return filenames + + +def crop_tile(tile, lat, lon, tile_width, tile_height): + """Crop the merged tiles + + Args: + tile: The filename of the desired tile to crop. + """ + in_filename = UNCROPPED_TILE_DIR + tile + out_filename = CROPPED_TILE_DIR + tile + pixel_size_x, pixel_size_y = 0.00009009009, -0.00009009009 + + src_ds = gdal.Open(in_filename) + gdal.Translate( + out_filename, + src_ds, + projWin=[lon, lat+tile_height, lon+tile_width, lat], + xRes=pixel_size_x, + yRes=pixel_size_y, + outputSRS='EPSG:4326', + format='COG', + creationOptions=['NUM_THREADS=all_cpus'] + ) + remove_temp_files(['tmp_px_size.tif', 'tmp.shp']) + + +def build_dataset(worldcover_tile_dir, lat_range, lon_range, tile_width, tile_height): + """ Main function for generating a dataset with worldcover tiles. + + Args: + worldcover_tile_dir: The directory containing the unprocessed worldcover tiles. + lat_range: The range of latitudes the dataset should cover. + lon_range: The range of longitudes the dataset should cover. + out_degrees: The width of the outputed dataset tiles in degrees. + """ + for lat in lat_range: + for lon in lon_range: + start_time = time.time() + tile = lat_lon_to_tile_string(lat, lon, is_worldcover=False) + tile_filename = UNCROPPED_TILE_DIR + tile + worldcover_tiles = lat_lon_to_filenames(worldcover_tile_dir, (lat, lon), WORLDCOVER_TILE_SIZE, tile_width) + print(f'Processing: {tile_filename} {worldcover_tiles}') + merge_tiles(worldcover_tiles, tile_filename, 'GTiff', compress=True) + crop_tile(tile, lat, lon, tile_width, tile_height) + end_time = time.time() + total_time = end_time - start_time + print(f'Time Elapsed: {total_time}s') + + +def main(): + + parser = argparse.ArgumentParser( + prog='generate_worldcover_tiles.py', + description='Main script for creating a tiled watermask dataset from the ESA WorldCover dataset.' + ) + + parser.add_argument('--worldcover-tiles-dir', help='The path to the directory containing the worldcover tifs.') + parser.add_argument( + '--lat-begin', + help='The minimum latitude of the dataset in EPSG:4326.', + default=-85, + required=True + ) + parser.add_argument('--lat-end', help='The maximum latitude of the dataset in EPSG:4326.', default=85) + parser.add_argument('--lon-begin', help='The minimum longitude of the dataset in EPSG:4326.', default=-180) + parser.add_argument('--lon-end', help='The maximum longitude of the dataset in EPSG:4326.', default=180) + parser.add_argument('--tile-width', help='The desired width of the tile in degrees.', default=5) + parser.add_argument('--tile-height', help='The desired height of the tile in degrees.', default=5) + + args = parser.parse_args() + + lat_begin = int(args.lat_begin) + lat_end = int(args.lat_end) + lon_begin = int(args.lon_begin) + lon_end = int(args.lon_end) + tile_width = int(args.tile_width) + tile_height = int(args.tile_height) + lat_range = range(lat_begin, lat_end, tile_width) + lon_range = range(lon_begin, lon_end, tile_height) + + setup_directories([PREPROCESSED_TILE_DIR, UNCROPPED_TILE_DIR, CROPPED_TILE_DIR]) + + # Process the multi-class masks into water/not-water masks. + tile_preprocessing(args.worldcover_tiles_dir, lat_begin, lat_end, lon_begin, lon_end) + + wc_lat_range = range( + lat_begin - (lat_begin % WORLDCOVER_TILE_SIZE), + lat_end + (lat_end % WORLDCOVER_TILE_SIZE), + WORLDCOVER_TILE_SIZE + ) + wc_lon_range = range( + lon_begin - (lon_begin % WORLDCOVER_TILE_SIZE), + lon_end + (lon_end % WORLDCOVER_TILE_SIZE), + WORLDCOVER_TILE_SIZE + ) + + # Ocean only tiles are missing from WorldCover, so we need to create blank (water-only) ones. + create_missing_tiles(PREPROCESSED_TILE_DIR, wc_lat_range, wc_lon_range) + + build_dataset( + PREPROCESSED_TILE_DIR, + lat_range, + lon_range, + tile_width=tile_width, + tile_height=tile_height + ) + + +if __name__ == '__main__': + main() diff --git a/src/asf_tools/watermasking/utils.py b/src/asf_tools/watermasking/utils.py new file mode 100644 index 00000000..7806be0d --- /dev/null +++ b/src/asf_tools/watermasking/utils.py @@ -0,0 +1,77 @@ +import os +import subprocess + +import numpy as np + + +def lat_lon_to_tile_string(lat, lon, is_worldcover: bool = False, postfix: str = '.tif'): + """Get the name of the tile with lower left corner (lat, lon). + + Args: + lat: The minimum latitude of the tile. + lon: The minimum longitude of the tile. + is_worldcover: Wheter the tile is Worldcover or OSM. + postfix: A postfix to append to the tile name to make it a filename. + + Returns: + The name of the tile. + """ + prefixes = ['N', 'S', 'E', 'W'] if is_worldcover else ['n', 's', 'e', 'w'] + if lat >= 0: + lat_part = prefixes[0] + str(int(lat)).zfill(2) + else: + lat_part = prefixes[1] + str(int(np.abs(lat))).zfill(2) + if lon >= 0: + lon_part = prefixes[2] + str(int(lon)).zfill(3) + else: + lon_part = prefixes[3] + str(int(np.abs(lon))).zfill(3) + return lat_part + lon_part + postfix + + +def merge_tiles(tiles, out_filename, out_format, compress=False): + """Merge tiles via buildvrt and translate. + + Args: + tiles: The list of tiles to be merged. + out_format: The format of the output image. + out_filename: The name of the output COG. + """ + vrt = 'merged.vrt' + build_vrt_command = ['gdalbuildvrt', vrt] + tiles + if not compress: + translate_command = ['gdal_translate', '-of', out_format, vrt, out_filename] + else: + translate_command = [ + 'gdal_translate', + '-of', out_format, + '-co', 'COMPRESS=LZW', + '-co', 'NUM_THREADS=all_cpus', + vrt, + out_filename + ] + subprocess.run(build_vrt_command) + subprocess.run(translate_command) + remove_temp_files([vrt]) + + +def remove_temp_files(temp_files: list): + """Remove each file in a list of files. + + Args: + temp_files: The list of temporary files to remove. + """ + for file in temp_files: + try: + os.remove(file) + except FileNotFoundError: + print(f'Temp file {file} was not found, skipping removal...') + + +def setup_directories(dirs: list[str]): + """Setup the directories necessary for running the script.""" + for directory in dirs: + try: + os.mkdir(directory) + except FileExistsError: + # Directories already exists. + pass