diff --git a/configs/configs_lidro.yaml b/configs/configs_lidro.yaml index 42ed485d..4eee3a7b 100644 --- a/configs/configs_lidro.yaml +++ b/configs/configs_lidro.yaml @@ -20,10 +20,19 @@ io: raster: # size for dilatation dilation_size: 3 - + +vector: + # Filter water's area (m²) + min_water_area: 150 + # Parameters for buffer + buffer_positive: 1 + buffer_negative: -1.5 # negative buffer should be bigger than positive buffer to prevent protruding over the banks + # Tolerance from Douglas-Peucker + tolerance: 1 + filter: # Classes to be considered as "non-water" - keep_classes: [0, 1, 2, 3, 4, 5, 6, 17, 65, 66, 67] + keep_classes: [0, 1, 2, 3, 4, 5, 6, 17, 64, 65, 66, 67] # All classes hydra: output_subdir: null diff --git a/data b/data index 48ae4faf..f7ebd371 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit 48ae4faf6a8a2c4233a22e927443e78a98c73d31 +Subproject commit f7ebd37180937ba4ceb9f231c0962234654c00e8 diff --git a/example_merge_mask_default.sh b/example_merge_mask_default.sh new file mode 100644 index 00000000..a2d6e65f --- /dev/null +++ b/example_merge_mask_default.sh @@ -0,0 +1,7 @@ +# For lauching Mask Hydro +python -m lidro.main_merge_mask \ +io.input_dir=./data/mask_hydro/ \ +io.output_dir=./tmp/merge_mask_hydro/ \ + + + diff --git a/lidro/main_merge_mask.py b/lidro/main_merge_mask.py new file mode 100644 index 00000000..350625de --- /dev/null +++ b/lidro/main_merge_mask.py @@ -0,0 +1,53 @@ +""" Main script for calculate Mask HYDRO 1 +""" + +import logging +import os + +import hydra +from omegaconf import DictConfig +from pyproj import CRS + +from lidro.merge_mask_hydro.vectors.merge_vector import merge_geom + + +@hydra.main(config_path="../configs/", config_name="configs_lidro.yaml", version_base="1.2") +def main(config: DictConfig): + """Merge all vector masks of hydro surfaces from the points classification of the input LAS/LAZ file, + and save it as a GeoJSON file. + + Args: + config (DictConfig): hydra configuration (configs/configs_lidro.yaml by default) + It contains the algorithm parameters and the input/output parameters + """ + logging.basicConfig(level=logging.INFO) + + # Check input/output files and folders + input_dir = config.io.input_dir + if input_dir is None: + raise ValueError("""config.io.input_dir is empty, please provide an input directory in the configuration""") + + if not os.path.isdir(input_dir): + raise FileNotFoundError(f"""The input directory ({input_dir}) doesn't exist.""") + + output_dir = config.io.output_dir + if output_dir is None: + raise ValueError("""config.io.output_dir is empty, please provide an input directory in the configuration""") + + os.makedirs(output_dir, exist_ok=True) + + if os.path.isdir(input_dir): + os.makedirs(output_dir, exist_ok=True) # Create folder "merge" + # Merge all Mash Hydro + merge_geom( + input_dir, + output_dir, + CRS.from_user_input(config.io.srid), + config.vector.min_water_area, + config.vector.buffer_positive, + config.vector.buffer_negative, + config.vector.tolerance) + + +if __name__ == "__main__": + main() diff --git a/lidro/merge_mask_hydro/vectors/check_rectify_geometry.py b/lidro/merge_mask_hydro/vectors/check_rectify_geometry.py new file mode 100644 index 00000000..12852b3a --- /dev/null +++ b/lidro/merge_mask_hydro/vectors/check_rectify_geometry.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +""" Rectify and check geometry +""" +from shapely.geometry import CAP_STYLE +import geopandas as gpd + +def apply_buffers_to_geometry(hydro_mask, buffer_positive: float, buffer_negative: float): + """Buffer geometry + Objective: create a HYDRO mask at the edge of the bank, not protruding over the banks. + In this case, negative buffer should be bigger than positive buffer + + Args: + hydro_mask (gpd.GeoDataFrame): geopandas dataframe with input geometry + buffer_positive (float): positive buffer to apply to the input geometry + buffer_negative (float): negative buffer to apply to the input geometry + Returns: + GeoJSON: updated geometry + """ + # Buffer + _geom = hydro_mask.buffer(buffer_positive, cap_style=CAP_STYLE.square) + geom = _geom.buffer(buffer_negative, cap_style=CAP_STYLE.square) + return geom + + +def fix_topology(initial_gdf: gpd.GeoDataFrame)-> gpd.GeoDataFrame: + """Fix topology + + Args: + initial_gdf (gpd.GeoDataFrame): Hydro Mask geometry + + Returns: + gpd.GeoDataFrame: Hydro Mask geometry valid + """ + # Obtain simple geometries + gdf_simple = initial_gdf.explode(ignore_index=True) + + gdf_simple = gdf_simple.drop_duplicates(ignore_index=True) + + # Identify invalid geometries, then return a GeoSeries with valid geometries + gdf_valid = gdf_simple.make_valid() + + return gdf_valid diff --git a/lidro/merge_mask_hydro/vectors/close_holes.py b/lidro/merge_mask_hydro/vectors/close_holes.py new file mode 100644 index 00000000..7b88a910 --- /dev/null +++ b/lidro/merge_mask_hydro/vectors/close_holes.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +""" Remove small holes """ +from shapely.geometry import Polygon +from shapely.ops import unary_union + + +def close_holes(polygon: Polygon, min_hole_area)-> Polygon: + """Remove small holes (surface < 100 m²) + + Args: + - polygon (Polygon): Hydro Mask geometry + - minhole_area (int): close holes in Mask Hydro : keep only holes with area bigger + than min_hole_area (> 100 m² by default) + + Returns: + Polygon: Hydro Mask geometry without holes (< 100 m²) + """ + # Interior Hydro Mask + interiors_to_keep = [ + interior for interior in polygon.interiors if Polygon(interior).area >= min_hole_area + ] + interior_holes_filter = [[pt for pt in inner.coords] for inner in interiors_to_keep] + + # Determine the appropriate Polygon to use based on the type of unified_shape + final_polygon = Polygon( + shell=polygon.exterior, holes=interior_holes_filter) + + + return final_polygon diff --git a/lidro/merge_mask_hydro/vectors/merge_vector.py b/lidro/merge_mask_hydro/vectors/merge_vector.py new file mode 100644 index 00000000..88019203 --- /dev/null +++ b/lidro/merge_mask_hydro/vectors/merge_vector.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +""" Merge +""" +import os + +import geopandas as gpd +from shapely.ops import unary_union + +from lidro.merge_mask_hydro.vectors.check_rectify_geometry import ( + apply_buffers_to_geometry, + fix_topology, +) +from lidro.merge_mask_hydro.vectors.close_holes import close_holes + + +def merge_geom( + input_folder: str, + output_folder: str, + crs: str, + min_water_area: int, + buffer_positive: float, + buffer_negative: float, + tolerance: float, +): + """Merge several masks of hydro surfaces from the points classification + of the input LAS/LAZ files from input_folder, + filter mask (keep only water's area > min_water_area) and save it as a GeoJSON file. + + Args: + input_folder (str): folder which contains several geojson geometries + output_folder (str): output folder + crs (str): a pyproj CRS object used to create the output GeoJSON file + min_water_area (int): filter Mask Hydro : keep only water objects with area bigger + than min_water_area (> 150 m² by default) + buffer_positive (int): positive buffer to apply to the mask + buffer_negative (int): negative buffer to apply to the mask + tolerance (float): All parts of a simplified geometry will be no more + than tolerance distance from the original. + """ + # Browse all files in folder + gdf = [ + gpd.read_file(os.path.join(input_folder, file), crs=crs).unary_union + for file in os.listdir(input_folder) + if file.endswith(".GeoJSON") + ] + + # Union geometry + gdf = unary_union(gdf) + gdf = gpd.GeoSeries(gdf, crs=crs).explode(index_parts=False) + + # keep only water's area (> 150 m² by default) + gdf = gdf[gdf.geometry.area > min_water_area] + + # Correct geometric errors: simplify certain shapes to make calculations easier + gdf = apply_buffers_to_geometry(gdf, buffer_positive, buffer_negative).explode(index_parts=False) + gdf = gdf.simplify(tolerance=tolerance, preserve_topology=True) + + # Check and rectify the invalid geometry + gdf = fix_topology(gdf) + + # Correction of holes (< 100m²) in Hydrological Masks + gs = gdf.geometry.apply(lambda p: close_holes(p, min_hole_area=100)) + gdf = gpd.GeoDataFrame(geometry=gs, crs=crs) + + # filter out water area < min_water_area (150 m² by default) again to make sure + # that previous geometry updates did not generate new small water areas + gdf = gdf[gdf.geometry.area > min_water_area] + + # save the result + gdf.to_file(os.path.join(output_folder, "MaskHydro_merge.geojson"), driver="GeoJSON", crs=crs) diff --git a/test/test_main_merge_mask.py b/test/test_main_merge_mask.py new file mode 100644 index 00000000..a4d8beb3 --- /dev/null +++ b/test/test_main_merge_mask.py @@ -0,0 +1,103 @@ +import os +import subprocess as sp +from pathlib import Path + +import pytest +from hydra import compose, initialize + +from lidro.main_merge_mask import main + +INPUT_DIR = Path("data") / "mask_hydro" +OUTPUT_DIR = Path("tmp") / "merge_mask_hydro/main" + + +def setup_module(module): + os.makedirs("tmp/merge_mask_hydro/main", exist_ok=True) + + +def test_main_run_okay(): + repo_dir = Path.cwd().parent + cmd = f"""python -m lidro.main_merge_mask \ + io.input_dir="{repo_dir}/lidro/data/mask_hydro/"\ + io.output_dir="{repo_dir}/lidro/tmp/merge_mask_hydro/main/" + """ + sp.run(cmd, shell=True, check=True) + + +def test_main_lidro_fail_no_input_dir(): + output_dir = OUTPUT_DIR / "main_no_input_dir" + pixel_size = 1 + min_water_area = 150 + buffer_positive = 0.5 + buffer_negative = -1.5 + tolerance = 1 + srid = 2154 + with initialize(version_base="1.2", config_path="../configs"): + # config is relative to a module + cfg = compose( + config_name="configs_lidro", + overrides=[ + f"io.output_dir={output_dir}", + f"io.pixel_size={pixel_size}", + f"vector.min_water_area={min_water_area}", + f"vector.buffer_positive={buffer_positive}", + f"vector.buffer_negative={buffer_negative}", + f"vector.tolerance={tolerance}", + f"io.srid={srid}", + ], + ) + with pytest.raises(ValueError): + main(cfg) + + +def test_main_lidro_fail_wrong_input_dir(): + output_dir = OUTPUT_DIR / "main_wrong_input_dir" + pixel_size = 1 + min_water_area = 150 + buffer_positive = 0.5 + buffer_negative = -1.5 + tolerance = 1 + srid = 2154 + with initialize(version_base="1.2", config_path="../configs"): + # config is relative to a module + cfg = compose( + config_name="configs_lidro", + overrides=[ + "io.input_dir=some_random_input_dir", + f"io.output_dir={output_dir}", + f"io.pixel_size={pixel_size}", + f"vector.min_water_area={min_water_area}", + f"vector.buffer_positive={buffer_positive}", + f"vector.buffer_negative={buffer_negative}", + f"vector.tolerance={tolerance}", + f"io.srid={srid}", + ], + ) + with pytest.raises(FileNotFoundError): + main(cfg) + + +def test_main_lidro_fail_no_output_dir(): + input_dir = INPUT_DIR + pixel_size = 1 + min_water_area = 150 + buffer_positive = 0.5 + buffer_negative = -1.5 + tolerance = 1 + srid = 2154 + with initialize(version_base="1.2", config_path="../configs"): + # config is relative to a module + cfg = compose( + config_name="configs_lidro", + overrides=[ + f"io.input_dir={input_dir}", + f"io.pixel_size={pixel_size}", + f"vector.min_water_area={min_water_area}", + f"vector.buffer_positive={buffer_positive}", + f"vector.buffer_negative={buffer_negative}", + f"vector.tolerance={tolerance}", + f"io.srid={srid}", + ], + ) + with pytest.raises(ValueError): + main(cfg) diff --git a/test/vectors/test_check_rectify_geometry.py b/test/vectors/test_check_rectify_geometry.py new file mode 100644 index 00000000..01719ffa --- /dev/null +++ b/test/vectors/test_check_rectify_geometry.py @@ -0,0 +1,48 @@ +import geopandas as gpd +from shapely.geometry import MultiPolygon + +from lidro.merge_mask_hydro.vectors.check_rectify_geometry import ( + apply_buffers_to_geometry, + fix_topology, +) + +def test_apply_buffers_to_geometry_default(): + input = "./data/merge_mask_hydro/MaskHydro_merge.geojson" + # Load each GeoJSON file as GeoDataFrame + geojson = gpd.read_file(input) + geometry = geojson["geometry"].unary_union + buffer = apply_buffers_to_geometry(geometry, 1, -1) + assert isinstance(buffer, MultiPolygon) + + +def test_fix_topology_default(): + input = "./data/merge_mask_hydro/MaskHydro_merge.geojson" + # Load each GeoJSON file as GeoDataFrame + geojson = gpd.read_file(input) + check_geom = fix_topology(geojson) + + assert isinstance(check_geom.dtypes, object) + + assert check_geom.geometry.dtype == "geometry" + + # Check not duplicates in the data + assert not check_geom.duplicated().any(), "There are duplicates in the data" + + # # Check geometry + assert check_geom.geometry.is_valid.all(), "Geometry no-valid" + +def test_fix_topology_error(): + input_error = "./data/merge_mask_hydro/MaskHydro_merge_NoValid.geojson" + # Load each GeoJSON file as GeoDataFrame + geojson = gpd.read_file(input_error) + check_geom = fix_topology(geojson) + + assert isinstance(check_geom.dtypes, object) + + assert check_geom.geometry.dtype == "geometry" + + # Check not duplicates in the data + assert not check_geom.duplicated().any(), "There are duplicates in the data" + + # # Check geometry + assert check_geom.geometry.is_valid.all(), "Geometry no-valid" diff --git a/test/vectors/test_close_holes.py b/test/vectors/test_close_holes.py new file mode 100644 index 00000000..1376cc04 --- /dev/null +++ b/test/vectors/test_close_holes.py @@ -0,0 +1,15 @@ +import geopandas as gpd +from shapely.geometry import Polygon + +from lidro.merge_mask_hydro.vectors.close_holes import close_holes + +input = "./data/merge_mask_hydro/MaskHydro_merge.geojson" + + +def test_close_holes_default(): + # Load each GeoJSON file as GeoDataFrame + geojson = gpd.read_file(input) + geometry = geojson["geometry"][1] # with contain severals holes + mask_without_hole = close_holes(geometry, 100) + + assert isinstance(mask_without_hole, Polygon) diff --git a/test/vectors/test_convert_to_vector.py b/test/vectors/test_convert_to_vector.py index 8678c381..a196b834 100644 --- a/test/vectors/test_convert_to_vector.py +++ b/test/vectors/test_convert_to_vector.py @@ -1,15 +1,17 @@ -import json import os import shutil from pathlib import Path +import geopandas as gpd +from pyproj import CRS +from shapely.geometry import Polygon + from lidro.create_mask_hydro.vectors.convert_to_vector import create_hydro_vector_mask TMP_PATH = Path("./tmp/create_mask_hydro/vectors/convert_to_vector") las_file = "./data/pointcloud/Semis_2021_0830_6291_LA93_IGN69.laz" output = "./tmp/create_mask_hydro/vectors/convert_to_vector/MaskHydro_Semis_2021_0830_6291_LA93_IGN69.GeoJSON" -output_main = "./tmp/create_mask_hydro/main/main_lidro_default/MaskHydro_Semis_2021_0830_6291_LA93_IGN69.GeoJSON" def setup_module(module): @@ -19,42 +21,21 @@ def setup_module(module): def test_create_hydro_vector_mask_default(): - crs = 'PROJCS["RGF93 v1 / Lambert-93",GEOGCS["RGF93 v1",DATUM["Reseau_Geodesique_Francais_1993_v1",\ - SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],\ - AUTHORITY["EPSG","6171"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],\ - UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4171"]],\ - PROJECTION["Lambert_Conformal_Conic_2SP"],PARAMETER["latitude_of_origin",46.5],\ - PARAMETER["central_meridian",3],PARAMETER["standard_parallel_1",49],PARAMETER["standard_parallel_2",44],\ - PARAMETER["false_easting",700000],PARAMETER["false_northing",6600000],\ - UNIT["metre",1,AUTHORITY["EPSG","9001"]],\ - AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","2154"]]' - - create_hydro_vector_mask(las_file, output, 1, 1000, [0, 1, 2, 3, 4, 5, 6, 17, 66], crs, 3) - + # Parameters + pixel_size = 1 + tile_size = 1000 + classes = [0, 1, 2, 3, 4, 5, 6, 17, 66] + crs = CRS.from_epsg(2154) + dilatation_size = 3 + + create_hydro_vector_mask(las_file, output, pixel_size, tile_size, classes, crs, dilatation_size) assert Path(output).exists() + gdf = gpd.read_file(output) -def test_check_structure_default(): - # Output - with open(output, "r") as f: - geojson_data = json.load(f) - - with open(output_main, "r") as f: - geojson_data_main = json.load(f) - - # CHECK STRUCTURE - assert "type" in geojson_data - assert geojson_data["type"] == "FeatureCollection" - assert "features" in geojson_data - assert isinstance(geojson_data["features"], list) - - # CHECK POLYGON - for feature in geojson_data["features"]: - geometry = feature["geometry"] - coordinates = geometry["coordinates"] - - for feature in geojson_data_main["features"]: - geometry_main = feature["geometry"] - coordinates_main = geometry_main["coordinates"] + assert not gdf.empty # GeoDataFrame shouldn't empty + assert gdf.crs.to_string() == crs # CRS is identical + assert all(isinstance(geom, Polygon) for geom in gdf.geometry) # All geometry should Polygons - assert coordinates[0] == coordinates_main[0] + expected_number_of_geometries = 2820 + assert len(gdf) == expected_number_of_geometries # the number of geometries must be identical diff --git a/test/vectors/test_merge_vector.py b/test/vectors/test_merge_vector.py new file mode 100644 index 00000000..b0599f1e --- /dev/null +++ b/test/vectors/test_merge_vector.py @@ -0,0 +1,41 @@ +import os +import shutil +from pathlib import Path + +import geopandas as gpd +from pyproj import CRS +from shapely.geometry import Polygon + +from lidro.merge_mask_hydro.vectors.merge_vector import merge_geom + +TMP_PATH = Path("./tmp/merge_mask_hydro/vectors/merge_mask_hydro") + +input_folder = "./data/mask_hydro/" +output = Path("./tmp/merge_mask_hydro/vectors/merge_mask_hydro/MaskHydro_merge.geojson") + + +def setup_module(module): + if TMP_PATH.is_dir(): + shutil.rmtree(TMP_PATH) + os.makedirs(TMP_PATH) + + +def test_create_hydro_vector_mask_default(): + # Parameters + min_water_area = 150 + buffer_positive = 0.5 + buffer_negative = -1.5 + tolerance = 1 + crs = CRS.from_epsg(2154) + + merge_geom(input_folder, TMP_PATH, crs, min_water_area, buffer_positive, buffer_negative, tolerance) + assert Path(output).exists() + + gdf = gpd.read_file(output) + + assert not gdf.empty # GeoDataFrame shouldn't empty + assert gdf.crs.to_string() == crs # CRS is identical + assert all(isinstance(geom, Polygon) for geom in gdf.geometry) # All geometry should Polygons + + expected_number_of_geometries = 3 + assert len(gdf) == expected_number_of_geometries # One geometry