From 2b58dd86fcc76bb7c03363307afed8f6673f8e8b Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Mon, 6 Jan 2025 10:55:29 +0100 Subject: [PATCH 01/15] creating batch file and first part of implementation --- config/clipImage.yaml | 20 ++++++--- config/logReg.yaml | 45 +++++++++++++++---- scripts/calculate_raster.py | 2 +- scripts/clip_image.py | 6 +-- scripts/functions/fct_misc.py | 10 +++-- scripts/infer_ml_batch.py | 85 +++++++++++++++++++++++++++++++++++ 6 files changed, 146 insertions(+), 22 deletions(-) create mode 100644 scripts/infer_ml_batch.py diff --git a/config/clipImage.yaml b/config/clipImage.yaml index 3771410..ee21bc9 100644 --- a/config/clipImage.yaml +++ b/config/clipImage.yaml @@ -10,12 +10,22 @@ # extent_ortho_directory: 01_initial/images/rs_tlm/extent clip_image: - working_directory: C:/Users/cmarmy/Documents/STDL/proj-vegroofs/data + working_directory: D:/GitHubProjects/STDL_vegroof_production inputs: - ortho_directory: 01_initial/images/infer_moitie/tiles/ - aoi: 01_initial/images/infer_moitie/extent/extent.shp + ortho_directory: sources/rasters/GE + aoi: sources/Footprint/GE/CAD_BATIMENT_HORSOL_updated.gpkg epsg: 'epsg:2056' outputs: - clip_ortho_directory: 02_intermediate/images/infer_moitie/tiles - extent_ortho_directory: 01_initial/images/infer_moitie/extent + clip_ortho_directory: ML/results/infer_moitie/tiles + extent_ortho_directory: ML/results/infer_moitie/tiles/extent + +# clip_image: +# working_directory: C:/Users/cmarmy/Documents/STDL/proj-vegroofs/data +# inputs: +# ortho_directory: 01_initial/images/infer_moitie/tiles/ +# aoi: 01_initial/images/infer_moitie/extent/extent.shp +# epsg: 'epsg:2056' +# outputs: +# clip_ortho_directory: 02_intermediate/images/infer_moitie/tiles +# extent_ortho_directory: 01_initial/images/infer_moitie/extent \ No newline at end of file diff --git a/config/logReg.yaml b/config/logReg.yaml index 8511874..93d7f88 100644 --- a/config/logReg.yaml +++ b/config/logReg.yaml @@ -3,25 +3,52 @@ hydra: dir: 02_intermediate/th/${now:%Y-%m-%d}/${now:%H-%M-%S} dev: - working_directory: C:/Users/cmarmy/Documents/STDL/proj-vegroofs/data - ortho_directory: 02_intermediate/images/infer_moitie/tiles - tile_delimitation: 02_intermediate/images/infer_moitie/extent/ - ndvi_directory: 02_intermediate/images/infer_moitie/ndvi - lum_directory: 02_intermediate/images/infer_moitie/lum - roofs_file: 02_intermediate/th/2024-08-15/09-12-47/0_500_green_roofs.shp # 02_intermediate/gt/inf_roofs.gpkg # + working_directory: D:/GitHubProjects/STDL_vegroof_production + ortho_directory: ML/results/infer_moitie/tiles + tile_delimitation: ML/results/infer_moitie/extent + ndvi_directory: ML/results/infer_moitie/ndvi + lum_directory: ML/results/infer_moitie/lum + roofs_file: sources/Footprint/GE/CAD_BATIMENT_HORSOL_updated.gpkg roofs_layer: gt: False green_tag: 'veg_new_3' green_cls: 'class_3' - chm_layer: 02_intermediate/autres/CHM_AOI_inf.gpkg - results_directory: 03_results/infer_moitie/ + chm_layer: sources/CHM/GE/SIPV_ICA_MNC_2019.shp + results_directory: ML/results egid_train_test: egid_train_test_gt.csv th_ndvi: 0 # no thresholding -1 th_lum: 500 # no thresholding 765 or 210000 cls_ml: 'binary' # 'binary' 'multi' 'multi_aggreg' model_ml: 'LR' # 'LR' 'RF' - trained_model_dir: 03_results/scratch_gt/ + trained_model_dir: ML/models epsg: 'epsg:2056' +# hydra: +# run: +# dir: 02_intermediate/th/${now:%Y-%m-%d}/${now:%H-%M-%S} + +# dev: +# working_directory: C:/Users/cmarmy/Documents/STDL/proj-vegroofs/data +# ortho_directory: 02_intermediate/images/infer_moitie/tiles +# tile_delimitation: 02_intermediate/images/infer_moitie/extent/ +# ndvi_directory: 02_intermediate/images/infer_moitie/ndvi +# lum_directory: 02_intermediate/images/infer_moitie/lum +# roofs_file: 02_intermediate/th/2024-08-15/09-12-47/0_500_green_roofs.shp # 02_intermediate/gt/inf_roofs.gpkg # +# roofs_layer: +# gt: False +# green_tag: 'veg_new_3' +# green_cls: 'class_3' +# chm_layer: 02_intermediate/autres/CHM_AOI_inf.gpkg +# results_directory: 03_results/infer_moitie/ +# egid_train_test: egid_train_test_gt.csv +# th_ndvi: 0 # no thresholding -1 +# th_lum: 500 # no thresholding 765 or 210000 +# cls_ml: 'binary' # 'binary' 'multi' 'multi_aggreg' +# model_ml: 'LR' # 'LR' 'RF' +# trained_model_dir: 03_results/scratch_gt/ +# epsg: 'epsg:2056' + + + diff --git a/scripts/calculate_raster.py b/scripts/calculate_raster.py index c0cf8a9..eeb959c 100644 --- a/scripts/calculate_raster.py +++ b/scripts/calculate_raster.py @@ -108,7 +108,7 @@ def calculate_lum(tile, band_nbr_red=1, band_nbr_green=2, band_nbr_blue=3, path= tile_list=[] tile_list.extend(tile_list_ortho) - for tile in tqdm(tile_list, 'Processing tiles'): + for _, tile in tqdm(enumerate(tile_list), total=len(tile_list), desc='Processing tiles'): tile = tile.replace("\\","/") #handle windows path ndvi_tile_path=os.path.join(NDVI_DIR, tile.split('/')[-1].replace('.tif', '_NDVI.tif')) _ = calculate_ndvi(tile, path=ndvi_tile_path) diff --git a/scripts/clip_image.py b/scripts/clip_image.py index a9d969f..50f59b2 100644 --- a/scripts/clip_image.py +++ b/scripts/clip_image.py @@ -2,7 +2,7 @@ import yaml import argparse from loguru import logger -import tqdm as tqdm +from tqdm import tqdm import geopandas as gpd @@ -56,7 +56,7 @@ # keep only the geometry column aoi = aoi.filter(['geometry']) # buffer every geometry by 50 units - for index, row in aoi.iterrows(): + for index, row in tqdm(aoi.iterrows(), total=len(aoi), desc="Buffering geometries"): row = row.copy() aoi.loc[index, 'geometry'] = row.geometry.buffer(50,join_style=2) @@ -64,7 +64,7 @@ aoi_clipped=aoi_clipped.reset_index(drop=True) i=1 - for idx,row in aoi_clipped.iterrows(): + for idx,row in tqdm(aoi_clipped.iterrows(), total=len(aoi_clipped), desc="Clipping rasters"): fct_misc.clip_im(ORTHO_DIR, aoi_clipped.iloc[[idx]], OUTPUT_DIR, i, EPSG) i=i+1 logger.success(f'Successfully clipped {i-1} images.') \ No newline at end of file diff --git a/scripts/functions/fct_misc.py b/scripts/functions/fct_misc.py index 82e3441..4835d95 100644 --- a/scripts/functions/fct_misc.py +++ b/scripts/functions/fct_misc.py @@ -16,6 +16,8 @@ import csv import warnings + +from tqdm import tqdm warnings.filterwarnings('ignore') def format_logger(logger): @@ -152,10 +154,10 @@ def generate_extent(PATH_IN: str, PATH_OUT: str, EPSG: str = 'epsg:2056'): list_name.append(name) ext_merge=gpd.GeoDataFrame() - for _name in list_name: + for _, _name in tqdm(enumerate(list_name), total=len(list_name), desc="Computing extent"): _tif = os.path.join(PATH_IN, _name) - logger.info(f'Computing extent of {str(_name)} ...') + # logger.info(f'Computing extent of {str(_name)} ...') with rasterio.open(_tif) as src: gdf = gpd.GeoDataFrame.from_features( @@ -193,7 +195,7 @@ def clip_im(TIFF_FOLDER: str, GPD: str, OUT_FOLDER: str, idx: int, EPSG: str = ' with rasterio.open(os.path.join(TIFF_FOLDER, GPD.iloc[-1]['NAME']+'.tif')) as src: - logger.info('Clipping ' + GPD.iloc[-1]['NAME'] + '.tif...') + # logger.info('Clipping ' + GPD.iloc[-1]['NAME'] + '.tif...') out_image, out_transform = rasterio.mask.mask( src, @@ -225,4 +227,4 @@ def clip_im(TIFF_FOLDER: str, GPD: str, OUT_FOLDER: str, idx: int, EPSG: str = ' with rasterio.open(out_path, 'w', **out_meta) as dst: dst.write(out_image) - logger.info('Clipped image ' + GPD.iloc[-1]['NAME']+'_'+str(idx) + ' written...') + # logger.info('Clipped image ' + GPD.iloc[-1]['NAME']+'_'+str(idx) + ' written...') diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py new file mode 100644 index 0000000..106db39 --- /dev/null +++ b/scripts/infer_ml_batch.py @@ -0,0 +1,85 @@ +import os +import numpy as np +import pandas as pd +import geopandas as gpd +import shutil +import yaml +import argparse +import subprocess +import tempfile + + +BATCH_SIZE = 100 + +def infer_ml_batch(cfg_clipImage, cfg_logRes): + WORKING_DIR = cfg_clipImage['clip_image']['working_directory'] + AOI = gpd.read_file(os.path.join(WORKING_DIR,cfg_clipImage['clip_image']['inputs']['aoi'])) + num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 + + # Create temp folder + temp_storage = tempfile.mkdtemp() + + # Create temp config files + cfg_clipImage['clip_image']['inputs']['aoi'] = os.path.join(temp_storage, 'sub_AOI.gpgk') + temp_cfg_clipImage = os.path.join(temp_storage, "clipImage.yaml") + with open(temp_cfg_clipImage, 'w') as outfile: + yaml.dump(cfg_clipImage, outfile) + + + for batch in range(num_batchs): + print(f"Processing batch {batch+1} / {num_batchs}") + + sub_AOI = AOI.iloc[BATCH_SIZE * batch:min(BATCH_SIZE * (batch + 1), len(AOI) - 1)] + sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpgk'), driver="GPKG") + + cfg_logRes['dev']['roofs_file'] = os.path.join(temp_storage, 'sub_AOI.gpgk') + temp_cfg_logReg = os.path.join(temp_storage, "logRes.yaml") + with open(temp_cfg_logReg, 'w') as outfile: + yaml.dump(cfg_logRes, outfile) + + # Clipping images + # subprocess.run(["./.venv/Scripts/python", "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) + + # # Computing rasters + # subprocess.run(["./.venv/Scripts/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg]) + + # # Greenery + # subprocess.run(["./.venv/Scripts/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg]) + + # Change result folder + cfg_logRes['dev']['results_directory'] = cfg_logRes['dev']['results_directory'] + f"/results_batch{batch}" + temp_cfg_logReg = os.path.join(temp_storage, "logRes.yaml") + with open(temp_cfg_logReg, 'w') as outfile: + yaml.dump(cfg_logRes, outfile) + + # Compute stats + # subprocess.run(["./.venv/Scripts/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg]) + + # Do inference + subprocess.run(["./.venv/Scripts/python", "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg]) + + # print(result.stdout) + quit() + os.remove(os.path.join(temp_storage, 'sub_AOI.gpgk')) + os.remove(temp_cfg) + shutil.rmtree(cfg_clipImage['clip_image']['outputs']['clip_ortho_directory']) + shutil.rmtree(cfg_clipImage['clip_image']['outputs']['extent_ortho_directory']) + + os.remove(temp_storage) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="This script computes NDVI and luminosity from NRGB rasters.") + parser.add_argument('-cfg', '--config_file', type=str, + help='Framework configuration file', + default="config/logReg.yaml") + args = parser.parse_args() + + # load input parameters + with open(args.config_file) as fp: + cfg_logRes = yaml.load(fp, Loader=yaml.FullLoader) + with open("config/clipImage.yaml") as fp: + cfg_clipImage = yaml.load(fp, Loader=yaml.FullLoader) + + infer_ml_batch(cfg_clipImage, cfg_logRes) From 5a8943007b0ad1c8c9029c1c5e387c435cf82b6c Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Tue, 7 Jan 2025 09:32:02 +0100 Subject: [PATCH 02/15] finishing batch infering --- scripts/infer_ml_batch.py | 63 ++++++++++++++++++++++++++------------- setup/requirements.txt | 2 +- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index 106db39..fd1ce7e 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -7,9 +7,9 @@ import argparse import subprocess import tempfile +from time import time - -BATCH_SIZE = 100 +BATCH_SIZE = 200 def infer_ml_batch(cfg_clipImage, cfg_logRes): WORKING_DIR = cfg_clipImage['clip_image']['working_directory'] @@ -25,11 +25,12 @@ def infer_ml_batch(cfg_clipImage, cfg_logRes): with open(temp_cfg_clipImage, 'w') as outfile: yaml.dump(cfg_clipImage, outfile) - + temp_result_folders = [] for batch in range(num_batchs): + start_time = time() print(f"Processing batch {batch+1} / {num_batchs}") - sub_AOI = AOI.iloc[BATCH_SIZE * batch:min(BATCH_SIZE * (batch + 1), len(AOI) - 1)] + sub_AOI = AOI.iloc[BATCH_SIZE * batch: min(BATCH_SIZE * (batch + 1), len(AOI) - 1)] sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpgk'), driver="GPKG") cfg_logRes['dev']['roofs_file'] = os.path.join(temp_storage, 'sub_AOI.gpgk') @@ -38,46 +39,68 @@ def infer_ml_batch(cfg_clipImage, cfg_logRes): yaml.dump(cfg_logRes, outfile) # Clipping images - # subprocess.run(["./.venv/Scripts/python", "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) + start_time_2 = time() + print(f"Time for loading initial stuff: {round((start_time_2 - start_time)/60, 2)}min") + subprocess.run(["./.venv/Scripts/python", "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) + start_time_3 = time() + print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") # # Computing rasters - # subprocess.run(["./.venv/Scripts/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/Scripts/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg]) + start_time_4 = time() + print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") # # Greenery - # subprocess.run(["./.venv/Scripts/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/Scripts/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg]) + start_time_5 = time() + print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") # Change result folder - cfg_logRes['dev']['results_directory'] = cfg_logRes['dev']['results_directory'] + f"/results_batch{batch}" + temp_res_fold = cfg_logRes['dev']['results_directory'] + f"/results_batch{batch}" + temp_result_folders.append(temp_res_fold) + cfg_logRes['dev']['results_directory'] = temp_res_fold temp_cfg_logReg = os.path.join(temp_storage, "logRes.yaml") with open(temp_cfg_logReg, 'w') as outfile: yaml.dump(cfg_logRes, outfile) # Compute stats - # subprocess.run(["./.venv/Scripts/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/Scripts/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg]) + start_time_6 = time() + print(f"Time for roof_stats script: {round((start_time_6 - start_time_5)/60, 2)}min") # Do inference subprocess.run(["./.venv/Scripts/python", "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg]) + start_time_7 = time() + print(f"Time for inference script: {round((start_time_7 - start_time_6)/60, 2)}min") # print(result.stdout) - quit() os.remove(os.path.join(temp_storage, 'sub_AOI.gpgk')) - os.remove(temp_cfg) - shutil.rmtree(cfg_clipImage['clip_image']['outputs']['clip_ortho_directory']) - shutil.rmtree(cfg_clipImage['clip_image']['outputs']['extent_ortho_directory']) + os.remove(temp_cfg_logReg) + shutil.rmtree(os.path.join(cfg_clipImage['clip_image']['working_directory'], cfg_clipImage['clip_image']['outputs']['clip_ortho_directory'])) + print(f"Time for batch: {round((time() - start_time))/60, 2}min") + if batch == 2: + break + # Merge results + df_results = gpd.GeoDataFrame() + for res_dir in temp_result_folders: + df_sub_res = gpd.read_file(os.path.join(res_dir, 'sub_AOI.gpkg')) + df_results = df_sub_res if len(df_results) == 0 else gpd.GeoDataFrame(pd.concat([df_results, df_sub_res], ignore_index=True)) + + df_results.to_file(os.path.join(cfg_logRes['dev']['working_directory'], cfg_logRes['dev']['results_directory'], 'results.gpkg'), driver="GPKG") os.remove(temp_storage) if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="This script computes NDVI and luminosity from NRGB rasters.") - parser.add_argument('-cfg', '--config_file', type=str, - help='Framework configuration file', - default="config/logReg.yaml") - args = parser.parse_args() + # parser = argparse.ArgumentParser( + # description="This script computes NDVI and luminosity from NRGB rasters.") + # parser.add_argument('-cfg', '--config_file', type=str, + # help='Framework configuration file', + # default="config/logReg.yaml") + # args = parser.parse_args() # load input parameters - with open(args.config_file) as fp: + with open("config/logReg.yaml") as fp: cfg_logRes = yaml.load(fp, Loader=yaml.FullLoader) with open("config/clipImage.yaml") as fp: cfg_clipImage = yaml.load(fp, Loader=yaml.FullLoader) diff --git a/setup/requirements.txt b/setup/requirements.txt index 20463a8..46f6d44 100644 --- a/setup/requirements.txt +++ b/setup/requirements.txt @@ -95,7 +95,7 @@ rasterio==1.3.9 # rasterstats rasterstats==0.19.0 # via -r setup/requirements.in -scikit-learn==1.3.2 +scikit-learn==1.5.2 # via -r setup/requirements.in scipy==1.10.1 # via scikit-learn From 302a1688a1677da8f52a8b1b39038605f2e8699d Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:41:37 +0100 Subject: [PATCH 03/15] batching script done --- config/clipImage.yaml | 2 +- config/logReg.yaml | 2 +- scripts/clip_image.py | 4 +- scripts/infer_ml_batch.py | 92 +++++++++++++++++++++++++-------------- 4 files changed, 64 insertions(+), 36 deletions(-) diff --git a/config/clipImage.yaml b/config/clipImage.yaml index ee21bc9..a316d1c 100644 --- a/config/clipImage.yaml +++ b/config/clipImage.yaml @@ -17,7 +17,7 @@ clip_image: epsg: 'epsg:2056' outputs: clip_ortho_directory: ML/results/infer_moitie/tiles - extent_ortho_directory: ML/results/infer_moitie/tiles/extent + extent_ortho_directory: ML/results/infer_moitie/extent # clip_image: diff --git a/config/logReg.yaml b/config/logReg.yaml index 93d7f88..976ee6e 100644 --- a/config/logReg.yaml +++ b/config/logReg.yaml @@ -5,7 +5,7 @@ hydra: dev: working_directory: D:/GitHubProjects/STDL_vegroof_production ortho_directory: ML/results/infer_moitie/tiles - tile_delimitation: ML/results/infer_moitie/extent + tile_delimitation: ML/results/infer_moitie/tiles/extent ndvi_directory: ML/results/infer_moitie/ndvi lum_directory: ML/results/infer_moitie/lum roofs_file: sources/Footprint/GE/CAD_BATIMENT_HORSOL_updated.gpkg diff --git a/scripts/clip_image.py b/scripts/clip_image.py index 50f59b2..dc4aaef 100644 --- a/scripts/clip_image.py +++ b/scripts/clip_image.py @@ -46,8 +46,8 @@ os.chdir(WORKING_DIR) fct_misc.ensure_dir_exists(OUTPUT_DIR) - - fct_misc.generate_extent(ORTHO_DIR, TILE_DELIMITATION, EPSG) + if not os.path.isfile(os.path.join(TILE_DELIMITATION,'extent.shp')): + fct_misc.generate_extent(ORTHO_DIR, TILE_DELIMITATION, EPSG) tiles=gpd.read_file(TILE_DELIMITATION) logger.info('Reading AOI geometries...') diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index fd1ce7e..57b4c5e 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -1,18 +1,24 @@ import os +import sys import numpy as np import pandas as pd import geopandas as gpd import shutil import yaml import argparse +from time import time import subprocess import tempfile -from time import time +sys.path.insert(1, 'scripts') +import functions.fct_misc as fct_misc +from copy import deepcopy -BATCH_SIZE = 200 +BATCH_SIZE = 1000 -def infer_ml_batch(cfg_clipImage, cfg_logRes): +def infer_ml_batch(cfg_clipImage, cfg_logReg): WORKING_DIR = cfg_clipImage['clip_image']['working_directory'] + CLS_ML = cfg_logReg['dev']['cls_ml'] + MODEL_ML = cfg_logReg['dev']['model_ml'] AOI = gpd.read_file(os.path.join(WORKING_DIR,cfg_clipImage['clip_image']['inputs']['aoi'])) num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 @@ -25,6 +31,24 @@ def infer_ml_batch(cfg_clipImage, cfg_logRes): with open(temp_cfg_clipImage, 'w') as outfile: yaml.dump(cfg_clipImage, outfile) + # Compute extents + OUTPUTS=cfg_clipImage['clip_image']['outputs'] + OUTPUT_DIR=OUTPUTS['clip_ortho_directory'] + + # os.chdir(WORKING_DIR) + fct_misc.ensure_dir_exists(OUTPUT_DIR) + + ORTHO_DIR=cfg_clipImage['clip_image']['inputs']['ortho_directory'] + TILE_DELIMITATION=cfg_clipImage['clip_image']['outputs']['extent_ortho_directory'] + EPSG=cfg_clipImage['clip_image']['inputs']['epsg'] + if not os.path.isfile(os.path.join(WORKING_DIR, TILE_DELIMITATION,'extent.shp')): + fct_misc.generate_extent( + os.path.join(WORKING_DIR, ORTHO_DIR), + os.path.join(WORKING_DIR, TILE_DELIMITATION), + EPSG, + ) + + # Start batching temp_result_folders = [] for batch in range(num_batchs): start_time = time() @@ -33,10 +57,12 @@ def infer_ml_batch(cfg_clipImage, cfg_logRes): sub_AOI = AOI.iloc[BATCH_SIZE * batch: min(BATCH_SIZE * (batch + 1), len(AOI) - 1)] sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpgk'), driver="GPKG") - cfg_logRes['dev']['roofs_file'] = os.path.join(temp_storage, 'sub_AOI.gpgk') - temp_cfg_logReg = os.path.join(temp_storage, "logRes.yaml") - with open(temp_cfg_logReg, 'w') as outfile: - yaml.dump(cfg_logRes, outfile) + temp_cfg_logReg = deepcopy(cfg_logReg) + temp_cfg_logReg['dev']['roofs_file'] = 'derp' + temp_cfg_logReg['dev']['roofs_file'] = os.path.join(temp_storage, 'sub_AOI.gpgk') + temp_cfg_logReg_dir = os.path.join(temp_storage, "logRes.yaml") + with open(temp_cfg_logReg_dir, 'w') as outfile: + yaml.dump(temp_cfg_logReg, outfile) # Clipping images start_time_2 = time() @@ -46,63 +72,65 @@ def infer_ml_batch(cfg_clipImage, cfg_logRes): print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") # # Computing rasters - subprocess.run(["./.venv/Scripts/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/Scripts/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) start_time_4 = time() print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") + # Change result folder + temp_res_fold = cfg_logReg['dev']['results_directory'] + f"/results_batch{batch}/" + temp_result_folders.append(temp_res_fold) + temp_cfg_logReg['dev']['results_directory'] = temp_res_fold + with open(temp_cfg_logReg_dir, 'w') as outfile: + yaml.dump(temp_cfg_logReg, outfile) + # # Greenery - subprocess.run(["./.venv/Scripts/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/Scripts/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg_dir]) start_time_5 = time() print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") - # Change result folder - temp_res_fold = cfg_logRes['dev']['results_directory'] + f"/results_batch{batch}" - temp_result_folders.append(temp_res_fold) - cfg_logRes['dev']['results_directory'] = temp_res_fold - temp_cfg_logReg = os.path.join(temp_storage, "logRes.yaml") - with open(temp_cfg_logReg, 'w') as outfile: - yaml.dump(cfg_logRes, outfile) # Compute stats - subprocess.run(["./.venv/Scripts/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/Scripts/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg_dir]) start_time_6 = time() print(f"Time for roof_stats script: {round((start_time_6 - start_time_5)/60, 2)}min") # Do inference - subprocess.run(["./.venv/Scripts/python", "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/Scripts/python", "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg_dir]) start_time_7 = time() print(f"Time for inference script: {round((start_time_7 - start_time_6)/60, 2)}min") # print(result.stdout) os.remove(os.path.join(temp_storage, 'sub_AOI.gpgk')) - os.remove(temp_cfg_logReg) - shutil.rmtree(os.path.join(cfg_clipImage['clip_image']['working_directory'], cfg_clipImage['clip_image']['outputs']['clip_ortho_directory'])) - print(f"Time for batch: {round((time() - start_time))/60, 2}min") + os.remove(temp_cfg_logReg_dir) + shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory'])) + # for file in os.listdir(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory'])): + # file_path = os.path.join(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory']), file) + # # Check if it's a file + # if os.path.isfile(file_path): + # os.remove(file_path) + shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ndvi_directory'])) + shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['lum_directory'])) + print(f"Time for batch: {round((time() - start_time)/60, 2)}min") if batch == 2: break # Merge results + print("="*10 + "\nMERGING RESULTS...") df_results = gpd.GeoDataFrame() for res_dir in temp_result_folders: - df_sub_res = gpd.read_file(os.path.join(res_dir, 'sub_AOI.gpkg')) + df_sub_res = gpd.read_file(os.path.join(WORKING_DIR, res_dir, 'inf_' + CLS_ML + '_' + MODEL_ML + '.gpkg')) df_results = df_sub_res if len(df_results) == 0 else gpd.GeoDataFrame(pd.concat([df_results, df_sub_res], ignore_index=True)) - df_results.to_file(os.path.join(cfg_logRes['dev']['working_directory'], cfg_logRes['dev']['results_directory'], 'results.gpkg'), driver="GPKG") + df_results.to_file(os.path.join(WORKING_DIR, cfg_logReg['dev']['results_directory'], 'results.gpkg'), driver="GPKG") os.remove(temp_storage) + print("MERGING COMPLETED!") if __name__ == '__main__': - # parser = argparse.ArgumentParser( - # description="This script computes NDVI and luminosity from NRGB rasters.") - # parser.add_argument('-cfg', '--config_file', type=str, - # help='Framework configuration file', - # default="config/logReg.yaml") - # args = parser.parse_args() - # load input parameters with open("config/logReg.yaml") as fp: - cfg_logRes = yaml.load(fp, Loader=yaml.FullLoader) + cfg_logReg = yaml.load(fp, Loader=yaml.FullLoader) with open("config/clipImage.yaml") as fp: cfg_clipImage = yaml.load(fp, Loader=yaml.FullLoader) - infer_ml_batch(cfg_clipImage, cfg_logRes) + infer_ml_batch(cfg_clipImage, cfg_logReg) From 0b44dd3cc0f71ba48ea9b77bab99ca7574be4385 Mon Sep 17 00:00:00 2001 From: destoswa Date: Wed, 8 Jan 2025 16:44:23 +0000 Subject: [PATCH 04/15] vm cfg --- config/clipImage.yaml | 4 ++-- config/logReg.yaml | 4 ++-- scripts/infer_ml_batch.py | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/config/clipImage.yaml b/config/clipImage.yaml index ee21bc9..03b7426 100644 --- a/config/clipImage.yaml +++ b/config/clipImage.yaml @@ -10,14 +10,14 @@ # extent_ortho_directory: 01_initial/images/rs_tlm/extent clip_image: - working_directory: D:/GitHubProjects/STDL_vegroof_production + working_directory: /mnt/data-volume-02/destouch/proj-vegroofs/data inputs: ortho_directory: sources/rasters/GE aoi: sources/Footprint/GE/CAD_BATIMENT_HORSOL_updated.gpkg epsg: 'epsg:2056' outputs: clip_ortho_directory: ML/results/infer_moitie/tiles - extent_ortho_directory: ML/results/infer_moitie/tiles/extent + extent_ortho_directory: ML/results/infer_moitie/extent # clip_image: diff --git a/config/logReg.yaml b/config/logReg.yaml index 93d7f88..60132d4 100644 --- a/config/logReg.yaml +++ b/config/logReg.yaml @@ -3,9 +3,9 @@ hydra: dir: 02_intermediate/th/${now:%Y-%m-%d}/${now:%H-%M-%S} dev: - working_directory: D:/GitHubProjects/STDL_vegroof_production + working_directory: /mnt/data-volume-02/destouch/proj-vegroofs/data ortho_directory: ML/results/infer_moitie/tiles - tile_delimitation: ML/results/infer_moitie/extent + tile_delimitation: ML/results/infer_moitie/tiles/extent ndvi_directory: ML/results/infer_moitie/ndvi lum_directory: ML/results/infer_moitie/lum roofs_file: sources/Footprint/GE/CAD_BATIMENT_HORSOL_updated.gpkg diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index fd1ce7e..a0b9350 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -41,17 +41,17 @@ def infer_ml_batch(cfg_clipImage, cfg_logRes): # Clipping images start_time_2 = time() print(f"Time for loading initial stuff: {round((start_time_2 - start_time)/60, 2)}min") - subprocess.run(["./.venv/Scripts/python", "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) + subprocess.run(["./.venv/bin/python", "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) start_time_3 = time() print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") # # Computing rasters - subprocess.run(["./.venv/Scripts/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/bin/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg]) start_time_4 = time() print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") # # Greenery - subprocess.run(["./.venv/Scripts/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/bin/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg]) start_time_5 = time() print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") @@ -64,12 +64,12 @@ def infer_ml_batch(cfg_clipImage, cfg_logRes): yaml.dump(cfg_logRes, outfile) # Compute stats - subprocess.run(["./.venv/Scripts/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/bin/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg]) start_time_6 = time() print(f"Time for roof_stats script: {round((start_time_6 - start_time_5)/60, 2)}min") # Do inference - subprocess.run(["./.venv/Scripts/python", "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg]) + subprocess.run(["./.venv/bin/python", "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg]) start_time_7 = time() print(f"Time for inference script: {round((start_time_7 - start_time_6)/60, 2)}min") From c9a09fa99804fdf864d5332d9754615f82c053cb Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Sat, 11 Jan 2025 16:52:55 +0100 Subject: [PATCH 05/15] small correction --- scripts/infer_ml_batch.py | 57 +++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index 57b4c5e..8866777 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -5,6 +5,7 @@ import geopandas as gpd import shutil import yaml +from tqdm import tqdm import argparse from time import time import subprocess @@ -12,8 +13,9 @@ sys.path.insert(1, 'scripts') import functions.fct_misc as fct_misc from copy import deepcopy +import platform -BATCH_SIZE = 1000 +BATCH_SIZE = 5000 def infer_ml_batch(cfg_clipImage, cfg_logReg): WORKING_DIR = cfg_clipImage['clip_image']['working_directory'] @@ -48,9 +50,17 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): EPSG, ) + # Platform interpretor + interpretor_path = "" + if platform.system() == 'Windows': + interpretor_path = "./.venv/Scripts/python" + else: + interpretor_path = "./.venv/bin/python" + # Start batching temp_result_folders = [] for batch in range(num_batchs): + batch = 1 start_time = time() print(f"Processing batch {batch+1} / {num_batchs}") @@ -67,12 +77,12 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # Clipping images start_time_2 = time() print(f"Time for loading initial stuff: {round((start_time_2 - start_time)/60, 2)}min") - subprocess.run(["./.venv/Scripts/python", "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) + subprocess.run([interpretor_path, "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) start_time_3 = time() print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") # # Computing rasters - subprocess.run(["./.venv/Scripts/python", "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) + subprocess.run([interpretor_path, "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) start_time_4 = time() print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") @@ -84,34 +94,28 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): yaml.dump(temp_cfg_logReg, outfile) # # Greenery - subprocess.run(["./.venv/Scripts/python", "./scripts/greenery.py", "-cfg", temp_cfg_logReg_dir]) + subprocess.run([interpretor_path, "./scripts/greenery.py", "-cfg", temp_cfg_logReg_dir]) start_time_5 = time() print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") # Compute stats - subprocess.run(["./.venv/Scripts/python", "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg_dir]) + subprocess.run([interpretor_path, "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg_dir]) start_time_6 = time() print(f"Time for roof_stats script: {round((start_time_6 - start_time_5)/60, 2)}min") # Do inference - subprocess.run(["./.venv/Scripts/python", "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg_dir]) + subprocess.run([interpretor_path, "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg_dir]) start_time_7 = time() print(f"Time for inference script: {round((start_time_7 - start_time_6)/60, 2)}min") - # print(result.stdout) os.remove(os.path.join(temp_storage, 'sub_AOI.gpgk')) os.remove(temp_cfg_logReg_dir) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory'])) - # for file in os.listdir(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory'])): - # file_path = os.path.join(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory']), file) - # # Check if it's a file - # if os.path.isfile(file_path): - # os.remove(file_path) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ndvi_directory'])) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['lum_directory'])) print(f"Time for batch: {round((time() - start_time)/60, 2)}min") - if batch == 2: + if batch == 1: break # Merge results @@ -122,11 +126,36 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): df_results = df_sub_res if len(df_results) == 0 else gpd.GeoDataFrame(pd.concat([df_results, df_sub_res], ignore_index=True)) df_results.to_file(os.path.join(WORKING_DIR, cfg_logReg['dev']['results_directory'], 'results.gpkg'), driver="GPKG") - os.remove(temp_storage) + shutil.rmtree(temp_storage) print("MERGING COMPLETED!") if __name__ == '__main__': + # # load input parameters + # with open("config/logReg.yaml") as fp: + # cfg_logReg = yaml.load(fp, Loader=yaml.FullLoader) + + # CLS_ML = cfg_logReg['dev']['cls_ml'] + # MODEL_ML = cfg_logReg['dev']['model_ml'] + + # WORKING_DIR = cfg_logReg['dev']['working_directory'] + # temp_result_folders = [ + # 'ML/results/results_batch0', + # 'ML/results/results_batch1', + # 'ML/results/results_batch2', + # ] + # WORKING_DIR = r"D:\GitHubProjects\STDL_vegroof_production" + # temp_result_folders = [f'ML/results_GE/results_batch{x}' for x in range(16)] + # # Merge results + # df_results = gpd.GeoDataFrame() + # for _, res_dir in tqdm(enumerate(temp_result_folders), total=len(temp_result_folders), desc='Merging results'): + # df_sub_res = gpd.read_file(os.path.join(WORKING_DIR, res_dir, "inf_binary_LR.gpkg")) + # df_results = df_sub_res if len(df_results) == 0 else gpd.GeoDataFrame(pd.concat([df_results, df_sub_res], ignore_index=True)) + + # df_results.to_file(os.path.join(WORKING_DIR, "ML/results_GE", 'results.gpkg'), driver="GPKG", index=False) + # quit() + + # load input parameters with open("config/logReg.yaml") as fp: cfg_logReg = yaml.load(fp, Loader=yaml.FullLoader) From 62f885b56ca3dc764ee6a14719faa2362c8da199 Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:56:33 +0100 Subject: [PATCH 06/15] version with Dask in-progress part commented --- config/clipImage.yaml | 6 +++--- config/logReg.yaml | 6 +++--- scripts/greenery.py | 32 ++++++++++++++++++++++++++------ scripts/infer_ml_batch.py | 5 +---- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/config/clipImage.yaml b/config/clipImage.yaml index 03b7426..4070637 100644 --- a/config/clipImage.yaml +++ b/config/clipImage.yaml @@ -10,10 +10,10 @@ # extent_ortho_directory: 01_initial/images/rs_tlm/extent clip_image: - working_directory: /mnt/data-volume-02/destouch/proj-vegroofs/data + working_directory: D:/GitHubProjects/STDL_vegroof_production inputs: - ortho_directory: sources/rasters/GE - aoi: sources/Footprint/GE/CAD_BATIMENT_HORSOL_updated.gpkg + ortho_directory: sources/rasters/ZH + aoi: sources/Footprint/ZH/00-AVBodenbedGeb_updated.gpkg epsg: 'epsg:2056' outputs: clip_ortho_directory: ML/results/infer_moitie/tiles diff --git a/config/logReg.yaml b/config/logReg.yaml index 60132d4..cb2cf15 100644 --- a/config/logReg.yaml +++ b/config/logReg.yaml @@ -3,17 +3,17 @@ hydra: dir: 02_intermediate/th/${now:%Y-%m-%d}/${now:%H-%M-%S} dev: - working_directory: /mnt/data-volume-02/destouch/proj-vegroofs/data + working_directory: D:/GitHubProjects/STDL_vegroof_production ortho_directory: ML/results/infer_moitie/tiles tile_delimitation: ML/results/infer_moitie/tiles/extent ndvi_directory: ML/results/infer_moitie/ndvi lum_directory: ML/results/infer_moitie/lum - roofs_file: sources/Footprint/GE/CAD_BATIMENT_HORSOL_updated.gpkg + roofs_file: sources/Footprint/ZH/00-AVBodenbedGeb_updated.gpkg roofs_layer: gt: False green_tag: 'veg_new_3' green_cls: 'class_3' - chm_layer: sources/CHM/GE/SIPV_ICA_MNC_2019.shp + chm_layer: sources/CHM/ZH/chm_ZH_total.shp results_directory: ML/results egid_train_test: egid_train_test_gt.csv th_ndvi: 0 # no thresholding -1 diff --git a/scripts/greenery.py b/scripts/greenery.py index 34116f8..e1bf192 100644 --- a/scripts/greenery.py +++ b/scripts/greenery.py @@ -10,6 +10,8 @@ import pandas as pd import geopandas as gpd +import dask_geopandas as dg +from time import time import fiona import rasterio from rasterio.features import shapes @@ -138,12 +140,30 @@ def do_greenery(tile, shapes_roof, roofs): green_roofs_egid.index.names = ['Index'] logger.info('Filtering for overhanging vegetation...') - - CHM = os.path.join(WORKING_DIR, CHM_LAYER) - CHM_GPD=gpd.read_file(CHM) - CHM_GPD['geometry'] = CHM_GPD.buffer(1) - green_roofs_egid=gpd.overlay(green_roofs_egid, CHM_GPD, how='difference') - green_roofs_egid['area_green'] = green_roofs_egid.area + try: + CHM = os.path.join(WORKING_DIR, CHM_LAYER) + print('starting to load CHM') + time_start = time() + CHM_GPD = gpd.read_file(CHM) + # CHM_GPD = dg.read_file(CHM, chunksize=100000) + # CHM_GPD = CHM_GPD.compute() + # CHM_GPD = CHM_GPD.calculate_spatial_partitions() + # small_bounds = green_roofs_egid.total_bounds + # CHM_GPD = CHM_GPD.cx[ + # small_bounds[0]:small_bounds[2], small_bounds[1]:small_bounds[3] + # ] + # CHM_GPD = CHM_GPD.compute() + # green_roofs_egid = CHM_GPD.overlay(green_roofs_egid, how='difference') + CHM_GPD['geometry'] = CHM_GPD.buffer(1) + green_roofs_egid['area_green'] = green_roofs_egid.area + print(f'finished to load CHM in {time() - time_start}sec') + + print('starting overlay') + time_start = time() + green_roofs_egid=gpd.overlay(green_roofs_egid, CHM_GPD, how='difference') + print(f'finished to overlay in {time() - time_start}sec') + except Exception as e: + logger.info(f"Error happened during overhanging veg filtering: {e}") logger.info('Join greenery on the roofs and vice-versa, saving...') diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index 8866777..932fd71 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -15,7 +15,7 @@ from copy import deepcopy import platform -BATCH_SIZE = 5000 +BATCH_SIZE = 10 def infer_ml_batch(cfg_clipImage, cfg_logReg): WORKING_DIR = cfg_clipImage['clip_image']['working_directory'] @@ -60,7 +60,6 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # Start batching temp_result_folders = [] for batch in range(num_batchs): - batch = 1 start_time = time() print(f"Processing batch {batch+1} / {num_batchs}") @@ -115,8 +114,6 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ndvi_directory'])) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['lum_directory'])) print(f"Time for batch: {round((time() - start_time)/60, 2)}min") - if batch == 1: - break # Merge results print("="*10 + "\nMERGING RESULTS...") From f03b27995522084a0728334f9af0d2be7afe0ae2 Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Wed, 15 Jan 2025 22:54:15 +0100 Subject: [PATCH 07/15] batching script working --- scripts/greenery.py | 48 ++++++++++++------------- scripts/infer_ml_batch.py | 33 +++++++++++++++++- scripts/roof_stats.py | 21 +++++------ scripts/tests_daks.py | 73 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 36 deletions(-) create mode 100644 scripts/tests_daks.py diff --git a/scripts/greenery.py b/scripts/greenery.py index e1bf192..b210a90 100644 --- a/scripts/greenery.py +++ b/scripts/greenery.py @@ -10,7 +10,6 @@ import pandas as pd import geopandas as gpd -import dask_geopandas as dg from time import time import fiona import rasterio @@ -139,31 +138,30 @@ def do_greenery(tile, shapes_roof, roofs): green_roofs_egid['EGID']=green_roofs_egid.index green_roofs_egid.index.names = ['Index'] - logger.info('Filtering for overhanging vegetation...') - try: - CHM = os.path.join(WORKING_DIR, CHM_LAYER) - print('starting to load CHM') - time_start = time() - CHM_GPD = gpd.read_file(CHM) - # CHM_GPD = dg.read_file(CHM, chunksize=100000) - # CHM_GPD = CHM_GPD.compute() - # CHM_GPD = CHM_GPD.calculate_spatial_partitions() - # small_bounds = green_roofs_egid.total_bounds - # CHM_GPD = CHM_GPD.cx[ - # small_bounds[0]:small_bounds[2], small_bounds[1]:small_bounds[3] - # ] - # CHM_GPD = CHM_GPD.compute() - # green_roofs_egid = CHM_GPD.overlay(green_roofs_egid, how='difference') - CHM_GPD['geometry'] = CHM_GPD.buffer(1) - green_roofs_egid['area_green'] = green_roofs_egid.area - print(f'finished to load CHM in {time() - time_start}sec') + # logger.info('Filtering for overhanging vegetation...') + # try: + # CHM = os.path.join(WORKING_DIR, CHM_LAYER) + # print('starting to load CHM') + # time_start = time() + # CHM_GPD = dg.read_file(CHM, chunksize=100000) + # # CHM_GPD = CHM_GPD.compute() + # CHM_GPD = CHM_GPD.calculate_spatial_partitions() + # small_bounds = green_roofs_egid.total_bounds + # CHM_GPD = CHM_GPD.cx[ + # small_bounds[0]:small_bounds[2], small_bounds[1]:small_bounds[3] + # ] + # CHM_GPD = CHM_GPD.compute() + # CHM_GPD['geometry'] = CHM_GPD.buffer(1) + # print(f'finished to load CHM in {time() - time_start}sec') - print('starting overlay') - time_start = time() - green_roofs_egid=gpd.overlay(green_roofs_egid, CHM_GPD, how='difference') - print(f'finished to overlay in {time() - time_start}sec') - except Exception as e: - logger.info(f"Error happened during overhanging veg filtering: {e}") + # print('starting overlay') + # time_start = time() + # green_roofs_egid=gpd.overlay(CHM_GPD, green_roofs_egid, how='difference') + # # green_roofs_egid = CHM_GPD.overlay(green_roofs_egid, how='difference') + # print(f'finished to overlay in {time() - time_start}sec') + # except Exception as e: + # logger.info(f"Error happened during overhanging veg filtering: {e}") + green_roofs_egid['area_green'] = green_roofs_egid.area logger.info('Join greenery on the roofs and vice-versa, saving...') diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index 932fd71..16ac072 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd import geopandas as gpd +import dask_geopandas as dg import shutil import yaml from tqdm import tqdm @@ -22,7 +23,6 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): CLS_ML = cfg_logReg['dev']['cls_ml'] MODEL_ML = cfg_logReg['dev']['model_ml'] AOI = gpd.read_file(os.path.join(WORKING_DIR,cfg_clipImage['clip_image']['inputs']['aoi'])) - num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 # Create temp folder temp_storage = tempfile.mkdtemp() @@ -57,6 +57,37 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): else: interpretor_path = "./.venv/bin/python" + # Filtering for overhanging vegetation + CHM = cfg_logReg['dev']['chm_layer'] + print('Filtering for overhanging vegetation...') + # green_roofs_egid = gpd.read_file(os.path.join(WORKING_DIR, AOI)) + time_start = time() + CHM_GPD = dg.read_file(os.path.join(WORKING_DIR, CHM), chunksize=100000) + delayed_partitions = CHM_GPD.to_delayed() + results = [] + print(f"Length of AOI: {len(AOI)}") + + for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions)): + # Compute the partition (convert to a GeoDataFrame) + partition_gdf = delayed_partition.compute() + + # Perform operation on the partition + AOI = gpd.overlay(AOI, partition_gdf,how='difference') + print(f"Length of AOI: {len(AOI)}") + + # Merging results into new roof file + # new_roofs = gpd.GeoDataFrame() + # for res in results: + # AOI = res if len(AOI) == 0 else gpd.GeoDataFrame(pd.concat([AOI, res], ignore_index=True)) + # print(f"Length of AOI: {len(AOI)}") + # AOI.drop_duplicates(inplace=True) + # print(f"Length of AOI: {len(AOI)}") + # new_roofs_src = os.path.join(temp_storage, "temp_roofs.gpkg") + # AOI.to_file(new_roofs_src, driver='GPKG') + + print(f'finished to process CHM in {time() - time_start}sec') + + num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 # Start batching temp_result_folders = [] for batch in range(num_batchs): diff --git a/scripts/roof_stats.py b/scripts/roof_stats.py index 89de2e3..4e92d1b 100644 --- a/scripts/roof_stats.py +++ b/scripts/roof_stats.py @@ -141,16 +141,17 @@ def do_stats(roof): roofs.rename(columns={GREEN_CLS:'cls'}, inplace=True) roofs.rename(columns={GREEN_TAG:'green_tag'}, inplace=True) - if not 'green_roofs' in ROOFS_POLYGONS: - roofs['geometry'] = roofs.buffer(-1) - logger.info('Filtering for overhanging vegetation...') - roofs = roofs[roofs.geometry.is_empty==False] - CHM = os.path.join(WORKING_DIR, CHM_LAYER) - chm=gpd.read_file(CHM) - chm['geometry'] = chm.buffer(1) - roofs_chm=gpd.overlay(roofs, chm, how='difference') - else: - roofs_chm=roofs + # if not 'green_roofs' in ROOFS_POLYGONS: + # roofs['geometry'] = roofs.buffer(-1) + # logger.info('Filtering for overhanging vegetation...') + # roofs = roofs[roofs.geometry.is_empty==False] + # CHM = os.path.join(WORKING_DIR, CHM_LAYER) + # chm=gpd.read_file(CHM) + # chm['geometry'] = chm.buffer(1) + # roofs_chm=gpd.overlay(roofs, chm, how='difference') + # else: + # roofs_chm=roofs + roofs_chm=roofs if GT: logger.info('Defining training and test dataset...') diff --git a/scripts/tests_daks.py b/scripts/tests_daks.py new file mode 100644 index 0000000..5476940 --- /dev/null +++ b/scripts/tests_daks.py @@ -0,0 +1,73 @@ +import os, sys +import yaml +import argparse +from loguru import logger + +from joblib import Parallel, delayed +import multiprocessing +from threading import Lock +from tqdm_joblib import tqdm_joblib + +import pandas as pd +import geopandas as gpd +import dask_geopandas as dg +# from dask.distributed import Client +from time import time +import fiona +import rasterio +from rasterio.features import shapes +from shapely.geometry import shape +from tqdm import tqdm +import csv +from csv import writer + +if __name__ == '__main__': + ROOFS_POLYGONS = "sources/Footprint/ZH/00-AVBodenbedGeb_updated.gpkg" + CHM_LAYER = "sources/CHM/ZH/chm_ZH_total.shp" + WORKING_DIR = "D:/GitHubProjects/STDL_vegroof_production" + CHM = os.path.join(WORKING_DIR, CHM_LAYER) + + + print('starting to load roofs') + time_start = time() + green_roofs_egid = gpd.read_file(os.path.join(WORKING_DIR, ROOFS_POLYGONS)) + print(f'finished to load roofs in {time() - time_start}sec') + + print('starting to load CHM') + # Start a Dask client for computation + # client = Client() + time_start = time() + CHM_GPD = dg.read_file(CHM, chunksize=100000) + delayed_partitions = CHM_GPD.to_delayed() + results = [] + + for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions)): + # Compute the partition (convert to a GeoDataFrame) + partition_gdf = delayed_partition.compute() + # print(partition_gdf.head()) + + # Perform your operation on the partition + # print(f"Processing partition with {len(partition_gdf)} rows") + results.append(gpd.overlay(partition_gdf, green_roofs_egid, how='difference')) + # results.append(len(partition_gdf)) + print(results) + + + + quit() + # CHM_GPD = CHM_GPD.compute() + CHM_GPD = CHM_GPD.calculate_spatial_partitions() + small_bounds = green_roofs_egid.total_bounds + CHM_GPD = CHM_GPD.cx[ + small_bounds[0]:small_bounds[2], small_bounds[1]:small_bounds[3] + ] + CHM_GPD = CHM_GPD.compute() + CHM_GPD['geometry'] = CHM_GPD.buffer(1) + print(f'finished to load CHM in {time() - time_start}sec') + + print('starting overlay') + time_start = time() + green_roofs_egid=gpd.overlay(CHM_GPD, green_roofs_egid, how='difference') + # green_roofs_egid = CHM_GPD.overlay(green_roofs_egid, how='difference') + print(f'finished to overlay in {time() - time_start}sec') + green_roofs_egid['area_green'] = green_roofs_egid.area \ No newline at end of file From 56a3a8d40497f60708181a2da08f4a3d2ce329e1 Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Thu, 16 Jan 2025 15:21:48 +0100 Subject: [PATCH 08/15] small modifs on buffer and roof file for roof_stats.py --- scripts/clip_image.py | 2 +- scripts/infer_ml_batch.py | 11 +++++++---- scripts/roof_stats.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/clip_image.py b/scripts/clip_image.py index dc4aaef..0960bcb 100644 --- a/scripts/clip_image.py +++ b/scripts/clip_image.py @@ -58,7 +58,7 @@ # buffer every geometry by 50 units for index, row in tqdm(aoi.iterrows(), total=len(aoi), desc="Buffering geometries"): row = row.copy() - aoi.loc[index, 'geometry'] = row.geometry.buffer(50,join_style=2) + aoi.loc[index, 'geometry'] = row.geometry.buffer(1,join_style=2) aoi_clipped=fct_misc.clip_labels(labels_gdf=aoi, tiles_gdf=tiles, predicate_sjoin='intersects') aoi_clipped=aoi_clipped.reset_index(drop=True) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index 16ac072..ecc4370 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -94,11 +94,10 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): start_time = time() print(f"Processing batch {batch+1} / {num_batchs}") - sub_AOI = AOI.iloc[BATCH_SIZE * batch: min(BATCH_SIZE * (batch + 1), len(AOI) - 1)] + sub_AOI = AOI.iloc[BATCH_SIZE * batch: min(BATCH_SIZE * (batch + 1), len(AOI))] sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpgk'), driver="GPKG") temp_cfg_logReg = deepcopy(cfg_logReg) - temp_cfg_logReg['dev']['roofs_file'] = 'derp' temp_cfg_logReg['dev']['roofs_file'] = os.path.join(temp_storage, 'sub_AOI.gpgk') temp_cfg_logReg_dir = os.path.join(temp_storage, "logRes.yaml") with open(temp_cfg_logReg_dir, 'w') as outfile: @@ -111,7 +110,7 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): start_time_3 = time() print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") - # # Computing rasters + # Computing rasters subprocess.run([interpretor_path, "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) start_time_4 = time() print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") @@ -123,11 +122,15 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): with open(temp_cfg_logReg_dir, 'w') as outfile: yaml.dump(temp_cfg_logReg, outfile) - # # Greenery + # Greenery subprocess.run([interpretor_path, "./scripts/greenery.py", "-cfg", temp_cfg_logReg_dir]) start_time_5 = time() print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") + temp_cfg_logReg['dev']['roofs_file'] = os.path.join(temp_res_fold, '0_500_green_roofs.gpkg') + temp_cfg_logReg_dir = os.path.join(temp_storage, "logRes.yaml") + with open(temp_cfg_logReg_dir, 'w') as outfile: + yaml.dump(temp_cfg_logReg, outfile) # Compute stats subprocess.run([interpretor_path, "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg_dir]) diff --git a/scripts/roof_stats.py b/scripts/roof_stats.py index 4e92d1b..cc3eea0 100644 --- a/scripts/roof_stats.py +++ b/scripts/roof_stats.py @@ -171,7 +171,7 @@ def do_stats(roof): clipped_roofs=fct_misc.clip_labels(labels_gdf=roofs_chm, tiles_gdf=tiles, predicate_sjoin='within') - roofs_stats=pd.DataFrame() + roofs_stats=pd.DataFrame() calculated_stats=['min', 'max', 'mean', 'median', 'std'] BANDS={1: 'nir', 2: 'red', 3: 'green', 4: 'blue'} From c033fe64bce6a0945aa2daeb06197a689140e93f Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Sat, 18 Jan 2025 12:03:52 +0100 Subject: [PATCH 09/15] adding filtering of invalid geometries in clip_image --- config/clipImage.yaml | 23 +------------ scripts/clip_image.py | 8 +++++ scripts/infer_ml_batch.py | 68 ++++++++++++++++++++++----------------- scripts/roof_stats.py | 2 +- 4 files changed, 48 insertions(+), 53 deletions(-) diff --git a/config/clipImage.yaml b/config/clipImage.yaml index 4070637..24db239 100644 --- a/config/clipImage.yaml +++ b/config/clipImage.yaml @@ -1,14 +1,3 @@ -# clip_image: -# working_directory: C:/Users/cmarmy/Documents/STDL/proj-vegroofs/data_test -# inputs: -# ortho_directory: 01_initial/images/rs_tlm/tiles/ -# aoi: 01_initial/aoi/STDL_ZH_AOI.shp -# epsg: 'epsg:2056' -# predicate_sjoin: 'intersects' -# outputs: -# clip_ortho_directory: 02_intermediate/images/rs_tlm/tiles -# extent_ortho_directory: 01_initial/images/rs_tlm/extent - clip_image: working_directory: D:/GitHubProjects/STDL_vegroof_production inputs: @@ -16,16 +5,6 @@ clip_image: aoi: sources/Footprint/ZH/00-AVBodenbedGeb_updated.gpkg epsg: 'epsg:2056' outputs: + result_directory: ML/results clip_ortho_directory: ML/results/infer_moitie/tiles extent_ortho_directory: ML/results/infer_moitie/extent - - -# clip_image: -# working_directory: C:/Users/cmarmy/Documents/STDL/proj-vegroofs/data -# inputs: -# ortho_directory: 01_initial/images/infer_moitie/tiles/ -# aoi: 01_initial/images/infer_moitie/extent/extent.shp -# epsg: 'epsg:2056' -# outputs: -# clip_ortho_directory: 02_intermediate/images/infer_moitie/tiles -# extent_ortho_directory: 01_initial/images/infer_moitie/extent \ No newline at end of file diff --git a/scripts/clip_image.py b/scripts/clip_image.py index 0960bcb..f798f29 100644 --- a/scripts/clip_image.py +++ b/scripts/clip_image.py @@ -43,6 +43,7 @@ OUTPUTS=cfg['outputs'] OUTPUT_DIR=OUTPUTS['clip_ortho_directory'] TILE_DELIMITATION=OUTPUTS['extent_ortho_directory'] + RESULT_DIR = OUTPUTS['result_directory'] os.chdir(WORKING_DIR) fct_misc.ensure_dir_exists(OUTPUT_DIR) @@ -53,6 +54,12 @@ logger.info('Reading AOI geometries...') aoi = gpd.read_file(AOI) + # filter out invalid geometries + invalid_samples = aoi.loc[~aoi.geometry.is_valid] + aoi = aoi.loc[aoi.geometry.is_valid] + invalid_samples.to_file(os.path.join(RESULT_DIR, 'invalid_samples.gpkg'), driver='GPKG') + aoi.to_file(os.path.join(RESULT_DIR, 'valid_samples.gpkg'), driver='GPKG') + # keep only the geometry column aoi = aoi.filter(['geometry']) # buffer every geometry by 50 units @@ -60,6 +67,7 @@ row = row.copy() aoi.loc[index, 'geometry'] = row.geometry.buffer(1,join_style=2) + aoi_clipped=fct_misc.clip_labels(labels_gdf=aoi, tiles_gdf=tiles, predicate_sjoin='intersects') aoi_clipped=aoi_clipped.reset_index(drop=True) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index ecc4370..eae39ec 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -16,7 +16,7 @@ from copy import deepcopy import platform -BATCH_SIZE = 10 +BATCH_SIZE = 5000 def infer_ml_batch(cfg_clipImage, cfg_logReg): WORKING_DIR = cfg_clipImage['clip_image']['working_directory'] @@ -64,7 +64,6 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): time_start = time() CHM_GPD = dg.read_file(os.path.join(WORKING_DIR, CHM), chunksize=100000) delayed_partitions = CHM_GPD.to_delayed() - results = [] print(f"Length of AOI: {len(AOI)}") for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions)): @@ -75,74 +74,83 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): AOI = gpd.overlay(AOI, partition_gdf,how='difference') print(f"Length of AOI: {len(AOI)}") - # Merging results into new roof file - # new_roofs = gpd.GeoDataFrame() - # for res in results: - # AOI = res if len(AOI) == 0 else gpd.GeoDataFrame(pd.concat([AOI, res], ignore_index=True)) - # print(f"Length of AOI: {len(AOI)}") - # AOI.drop_duplicates(inplace=True) - # print(f"Length of AOI: {len(AOI)}") - # new_roofs_src = os.path.join(temp_storage, "temp_roofs.gpkg") - # AOI.to_file(new_roofs_src, driver='GPKG') print(f'finished to process CHM in {time() - time_start}sec') num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 # Start batching temp_result_folders = [] + invalid_geoms = [] for batch in range(num_batchs): + if batch not in [1, 4, 5, 6, 7, 8, 11, 12, 13, 21, 24, 27, 35, 40, 42, 46, 52, 53, 54, 55, 59, 60, 63]: + continue start_time = time() - print(f"Processing batch {batch+1} / {num_batchs}") + print(f"Processing batch {batch} / {num_batchs - 1}") + # Select roofs to process sub_AOI = AOI.iloc[BATCH_SIZE * batch: min(BATCH_SIZE * (batch + 1), len(AOI))] - sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpgk'), driver="GPKG") + sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpkg'), driver="GPKG") + # Change result folder + batch_res_fold = cfg_logReg['dev']['results_directory'] + f"/results_batch{batch}/" + temp_result_folders.append(batch_res_fold) + # temp_cfg_logReg['dev']['results_directory'] = batch_res_fold + # with open(temp_cfg_logReg_dir, 'w') as outfile: + # yaml.dump(temp_cfg_logReg, outfile) + + # Create temp cfg files + # _clipImage + temp_cfg_clipImage = deepcopy(cfg_clipImage) + temp_cfg_clipImage['clip_image']['inputs']['aoi'] = os.path.join(temp_storage, 'sub_AOI.gpkg') + temp_cfg_clipImage['clip_image']['outputs']['result_directory'] = batch_res_fold + temp_cfg_clipImage_dir = os.path.join(temp_storage, "clipImage.yaml") + with open(temp_cfg_clipImage_dir, 'w') as outfile: + yaml.dump(temp_cfg_clipImage, outfile) + + # _logReg temp_cfg_logReg = deepcopy(cfg_logReg) - temp_cfg_logReg['dev']['roofs_file'] = os.path.join(temp_storage, 'sub_AOI.gpgk') + temp_cfg_logReg['dev']['roofs_file'] = os.path.join(batch_res_fold, 'valid_samples.gpkg') + temp_cfg_logReg['dev']['results_directory'] = batch_res_fold temp_cfg_logReg_dir = os.path.join(temp_storage, "logRes.yaml") with open(temp_cfg_logReg_dir, 'w') as outfile: yaml.dump(temp_cfg_logReg, outfile) - - # Clipping images + + + # Call subprocesses + # _Clipping images start_time_2 = time() print(f"Time for loading initial stuff: {round((start_time_2 - start_time)/60, 2)}min") - subprocess.run([interpretor_path, "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage]) + subprocess.run([interpretor_path, "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage_dir]) start_time_3 = time() print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") - # Computing rasters + # _Computing rasters subprocess.run([interpretor_path, "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) start_time_4 = time() print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") - # Change result folder - temp_res_fold = cfg_logReg['dev']['results_directory'] + f"/results_batch{batch}/" - temp_result_folders.append(temp_res_fold) - temp_cfg_logReg['dev']['results_directory'] = temp_res_fold - with open(temp_cfg_logReg_dir, 'w') as outfile: - yaml.dump(temp_cfg_logReg, outfile) - - # Greenery + # _Greenery subprocess.run([interpretor_path, "./scripts/greenery.py", "-cfg", temp_cfg_logReg_dir]) start_time_5 = time() print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") - temp_cfg_logReg['dev']['roofs_file'] = os.path.join(temp_res_fold, '0_500_green_roofs.gpkg') + temp_cfg_logReg['dev']['roofs_file'] = os.path.join(batch_res_fold, '0_500_green_roofs.gpkg') temp_cfg_logReg_dir = os.path.join(temp_storage, "logRes.yaml") with open(temp_cfg_logReg_dir, 'w') as outfile: yaml.dump(temp_cfg_logReg, outfile) - # Compute stats + # _Compute stats subprocess.run([interpretor_path, "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg_dir]) start_time_6 = time() print(f"Time for roof_stats script: {round((start_time_6 - start_time_5)/60, 2)}min") - # Do inference + # _Do inference subprocess.run([interpretor_path, "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg_dir]) start_time_7 = time() print(f"Time for inference script: {round((start_time_7 - start_time_6)/60, 2)}min") - os.remove(os.path.join(temp_storage, 'sub_AOI.gpgk')) + # Clean temp architecture + os.remove(os.path.join(temp_storage, 'sub_AOI.gpkg')) os.remove(temp_cfg_logReg_dir) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory'])) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ndvi_directory'])) diff --git a/scripts/roof_stats.py b/scripts/roof_stats.py index cc3eea0..4e92d1b 100644 --- a/scripts/roof_stats.py +++ b/scripts/roof_stats.py @@ -171,7 +171,7 @@ def do_stats(roof): clipped_roofs=fct_misc.clip_labels(labels_gdf=roofs_chm, tiles_gdf=tiles, predicate_sjoin='within') - roofs_stats=pd.DataFrame() + roofs_stats=pd.DataFrame() calculated_stats=['min', 'max', 'mean', 'median', 'std'] BANDS={1: 'nir', 2: 'red', 3: 'green', 4: 'blue'} From 6e3b4a633e2db768d2d3f6ab18e44004e6ec82d6 Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Mon, 20 Jan 2025 15:06:02 +0100 Subject: [PATCH 10/15] tests on overlay vs clip_image. problem of invalid samples --- scripts/functions/fct_misc.py | 3 +-- scripts/infer_ml_batch.py | 39 ++++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/scripts/functions/fct_misc.py b/scripts/functions/fct_misc.py index 4835d95..c34fa3b 100644 --- a/scripts/functions/fct_misc.py +++ b/scripts/functions/fct_misc.py @@ -6,7 +6,7 @@ import numpy as np import geopandas as gpd import pandas as pd -from shapely.geometry import mapping, shape +from shapely.geometry import mapping, shape, MultiPolygon from shapely.affinity import scale import rasterio @@ -80,7 +80,6 @@ def clip_labels(labels_gdf: gpd.GeoDataFrame, tiles_gdf: gpd.GeoDataFrame, predi labels_tiles_sjoined_gdf = gpd.sjoin(labels_gdf, tiles_gdf, how='inner', predicate=predicate_sjoin) def clip_row(row, fact=fact): - old_geo = row.geometry scaled_tile_geo = scale(row.tile_geometry, xfact=fact, yfact=fact) new_geo = old_geo.intersection(scaled_tile_geo) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index eae39ec..d6129c0 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -15,6 +15,7 @@ import functions.fct_misc as fct_misc from copy import deepcopy import platform +from shapely.geometry import MultiPolygon BATCH_SIZE = 5000 @@ -58,21 +59,37 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): interpretor_path = "./.venv/bin/python" # Filtering for overhanging vegetation + def to_multipolygon(geometry): + if geometry.geom_type == "Polygon": + return MultiPolygon([geometry]) + elif geometry.geom_type == "MultiPolygon": + return geometry + else: + raise ValueError("No geometries after overlay!!!") + return None # Handle unexpected geometry types if needed + CHM = cfg_logReg['dev']['chm_layer'] print('Filtering for overhanging vegetation...') # green_roofs_egid = gpd.read_file(os.path.join(WORKING_DIR, AOI)) time_start = time() CHM_GPD = dg.read_file(os.path.join(WORKING_DIR, CHM), chunksize=100000) delayed_partitions = CHM_GPD.to_delayed() - print(f"Length of AOI: {len(AOI)}") - + print(f"1 - Length of AOI: {len(AOI)}") + AOI.to_file(os.path.join(WORKING_DIR, "test_original_aoi.gpkg"), driver="GPKG") + AOI = AOI.loc[AOI.geometry.is_valid] + print(f"2 - Length of AOI: {len(AOI)}") + AOI.to_file(os.path.join(WORKING_DIR, "test_valid_aoi.gpkg"), driver="GPKG") for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions)): # Compute the partition (convert to a GeoDataFrame) partition_gdf = delayed_partition.compute() - + # Perform operation on the partition - AOI = gpd.overlay(AOI, partition_gdf,how='difference') - print(f"Length of AOI: {len(AOI)}") + AOI = gpd.overlay(AOI, partition_gdf, how='difference', keep_geom_type=True) + AOI['geometry'] = AOI['geometry'].apply(to_multipolygon) + print(f"3 - Length of AOI: {len(AOI)}") + AOI = AOI.loc[AOI.geometry.is_valid] + print(f"4 - Length of AOI: {len(AOI)}") + AOI.to_file(os.path.join(WORKING_DIR, "test_overlayed_aoi.gpkg"), driver="GPKG") print(f'finished to process CHM in {time() - time_start}sec') @@ -80,9 +97,9 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 # Start batching temp_result_folders = [] - invalid_geoms = [] for batch in range(num_batchs): - if batch not in [1, 4, 5, 6, 7, 8, 11, 12, 13, 21, 24, 27, 35, 40, 42, 46, 52, 53, 54, 55, 59, 60, 63]: + # if batch not in [1, 4, 5, 6, 7, 8, 11, 12, 13, 21, 24, 27, 35, 40, 42, 46, 52, 53, 54, 55, 59, 60, 63]: + if batch != 4: continue start_time = time() print(f"Processing batch {batch} / {num_batchs - 1}") @@ -92,8 +109,10 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpkg'), driver="GPKG") # Change result folder - batch_res_fold = cfg_logReg['dev']['results_directory'] + f"/results_batch{batch}/" + batch_res_fold = os.path.join(WORKING_DIR, cfg_logReg['dev']['results_directory']) + f"/results_batch{batch}/" temp_result_folders.append(batch_res_fold) + if not os.path.exists(batch_res_fold): + os.mkdir(batch_res_fold) # temp_cfg_logReg['dev']['results_directory'] = batch_res_fold # with open(temp_cfg_logReg_dir, 'w') as outfile: # yaml.dump(temp_cfg_logReg, outfile) @@ -193,6 +212,10 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # df_results.to_file(os.path.join(WORKING_DIR, "ML/results_GE", 'results.gpkg'), driver="GPKG", index=False) # quit() + # original = gpd.read_file(r'D:\GitHubProjects\STDL_vegroof_production\test_original_aoi.gpkg') + # valid = gpd.read_file(r'D:\GitHubProjects\STDL_vegroof_production\test_valid_aoi.gpkg') + # overlayed = gpd.read_file(r'D:\GitHubProjects\STDL_vegroof_production\test_overlayed_aoi.gpkg') + # quit() # load input parameters From d54f33caeee4213be661e84fa866b99e424b827c Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:04:36 +0100 Subject: [PATCH 11/15] Insert overlay inside batch iteration --- scripts/infer_ml_batch.py | 90 ++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index d6129c0..a0048fe 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -58,41 +58,41 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): else: interpretor_path = "./.venv/bin/python" - # Filtering for overhanging vegetation - def to_multipolygon(geometry): - if geometry.geom_type == "Polygon": - return MultiPolygon([geometry]) - elif geometry.geom_type == "MultiPolygon": - return geometry - else: - raise ValueError("No geometries after overlay!!!") - return None # Handle unexpected geometry types if needed - - CHM = cfg_logReg['dev']['chm_layer'] - print('Filtering for overhanging vegetation...') - # green_roofs_egid = gpd.read_file(os.path.join(WORKING_DIR, AOI)) - time_start = time() - CHM_GPD = dg.read_file(os.path.join(WORKING_DIR, CHM), chunksize=100000) - delayed_partitions = CHM_GPD.to_delayed() - print(f"1 - Length of AOI: {len(AOI)}") - AOI.to_file(os.path.join(WORKING_DIR, "test_original_aoi.gpkg"), driver="GPKG") - AOI = AOI.loc[AOI.geometry.is_valid] - print(f"2 - Length of AOI: {len(AOI)}") - AOI.to_file(os.path.join(WORKING_DIR, "test_valid_aoi.gpkg"), driver="GPKG") - for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions)): - # Compute the partition (convert to a GeoDataFrame) - partition_gdf = delayed_partition.compute() - - # Perform operation on the partition - AOI = gpd.overlay(AOI, partition_gdf, how='difference', keep_geom_type=True) - AOI['geometry'] = AOI['geometry'].apply(to_multipolygon) - print(f"3 - Length of AOI: {len(AOI)}") - AOI = AOI.loc[AOI.geometry.is_valid] - print(f"4 - Length of AOI: {len(AOI)}") - AOI.to_file(os.path.join(WORKING_DIR, "test_overlayed_aoi.gpkg"), driver="GPKG") - - - print(f'finished to process CHM in {time() - time_start}sec') + # # Filtering for overhanging vegetation + # def to_multipolygon(geometry): + # if geometry.geom_type == "Polygon": + # return MultiPolygon([geometry]) + # elif geometry.geom_type == "MultiPolygon": + # return geometry + # else: + # raise ValueError("No geometries after overlay!!!") + # return None # Handle unexpected geometry types if needed + + # CHM = cfg_logReg['dev']['chm_layer'] + # print('Filtering for overhanging vegetation...') + # # green_roofs_egid = gpd.read_file(os.path.join(WORKING_DIR, AOI)) + # time_start = time() + # CHM_GPD = dg.read_file(os.path.join(WORKING_DIR, CHM), chunksize=100000) + # delayed_partitions = CHM_GPD.to_delayed() + # print(f"1 - Length of AOI: {len(AOI)}") + # AOI.to_file(os.path.join(WORKING_DIR, "test_original_aoi.gpkg"), driver="GPKG") + # AOI = AOI.loc[AOI.geometry.is_valid] + # print(f"2 - Length of AOI: {len(AOI)}") + # AOI.to_file(os.path.join(WORKING_DIR, "test_valid_aoi.gpkg"), driver="GPKG") + # for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions)): + # # Compute the partition (convert to a GeoDataFrame) + # partition_gdf = delayed_partition.compute() + + # # Perform operation on the partition + # AOI = gpd.overlay(AOI, partition_gdf, how='difference', keep_geom_type=True) + # AOI['geometry'] = AOI['geometry'].apply(to_multipolygon) + # print(f"3 - Length of AOI: {len(AOI)}") + # AOI = AOI.loc[AOI.geometry.is_valid] + # print(f"4 - Length of AOI: {len(AOI)}") + # AOI.to_file(os.path.join(WORKING_DIR, "test_overlayed_aoi.gpkg"), driver="GPKG") + + + # print(f'finished to process CHM in {time() - time_start}sec') num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 # Start batching @@ -113,9 +113,6 @@ def to_multipolygon(geometry): temp_result_folders.append(batch_res_fold) if not os.path.exists(batch_res_fold): os.mkdir(batch_res_fold) - # temp_cfg_logReg['dev']['results_directory'] = batch_res_fold - # with open(temp_cfg_logReg_dir, 'w') as outfile: - # yaml.dump(temp_cfg_logReg, outfile) # Create temp cfg files # _clipImage @@ -137,18 +134,33 @@ def to_multipolygon(geometry): # Call subprocesses # _Clipping images + print("Clipping images") start_time_2 = time() print(f"Time for loading initial stuff: {round((start_time_2 - start_time)/60, 2)}min") subprocess.run([interpretor_path, "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage_dir]) start_time_3 = time() print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") + # Overlay on CHM + print("Overlaying with CHM") + CHM = cfg_logReg['dev']['chm_layer'] + CHM_GPD = dg.read_file(os.path.join(WORKING_DIR, CHM), chunksize=100000) + delayed_partitions = CHM_GPD.to_delayed() + for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions), desc="Overlaying"): + # Compute the partition (convert to a GeoDataFrame) + partition_gdf = delayed_partition.compute() + + # Perform operation on the partition + sub_AOI = gpd.overlay(sub_AOI, partition_gdf, how='difference', keep_geom_type=True) + # _Computing rasters + print("Computing rasters") subprocess.run([interpretor_path, "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) start_time_4 = time() print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") # _Greenery + print("Computing greenery") subprocess.run([interpretor_path, "./scripts/greenery.py", "-cfg", temp_cfg_logReg_dir]) start_time_5 = time() print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") @@ -159,11 +171,13 @@ def to_multipolygon(geometry): yaml.dump(temp_cfg_logReg, outfile) # _Compute stats + print("Computing stats") subprocess.run([interpretor_path, "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg_dir]) start_time_6 = time() print(f"Time for roof_stats script: {round((start_time_6 - start_time_5)/60, 2)}min") # _Do inference + print("Infering") subprocess.run([interpretor_path, "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg_dir]) start_time_7 = time() print(f"Time for inference script: {round((start_time_7 - start_time_6)/60, 2)}min") From a0fcd10ffb78ac65dc1bf3ee774046e1de66c375 Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Tue, 21 Jan 2025 14:11:58 +0100 Subject: [PATCH 12/15] small change to save the overlayed sub_AOI onwhich to work --- scripts/infer_ml_batch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index a0048fe..e52e845 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -99,7 +99,7 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): temp_result_folders = [] for batch in range(num_batchs): # if batch not in [1, 4, 5, 6, 7, 8, 11, 12, 13, 21, 24, 27, 35, 40, 42, 46, 52, 53, 54, 55, 59, 60, 63]: - if batch != 4: + if batch != 1: continue start_time = time() print(f"Processing batch {batch} / {num_batchs - 1}") @@ -131,7 +131,6 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): with open(temp_cfg_logReg_dir, 'w') as outfile: yaml.dump(temp_cfg_logReg, outfile) - # Call subprocesses # _Clipping images print("Clipping images") @@ -152,6 +151,7 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # Perform operation on the partition sub_AOI = gpd.overlay(sub_AOI, partition_gdf, how='difference', keep_geom_type=True) + sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpkg'), driver="GPKG") # _Computing rasters print("Computing rasters") From bb85841c7e59583c6572ab295df6671c1110a59d Mon Sep 17 00:00:00 2001 From: destoswa Date: Wed, 22 Jan 2025 09:30:05 +0000 Subject: [PATCH 13/15] only merging --- scripts/infer_ml_batch.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index a0048fe..b2edf61 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -98,8 +98,12 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # Start batching temp_result_folders = [] for batch in range(num_batchs): - # if batch not in [1, 4, 5, 6, 7, 8, 11, 12, 13, 21, 24, 27, 35, 40, 42, 46, 52, 53, 54, 55, 59, 60, 63]: - if batch != 4: + + batch_res_fold = os.path.join(WORKING_DIR, cfg_logReg['dev']['results_directory']) + f"/results_batch{batch}/" + temp_result_folders.append(batch_res_fold) + continue + if batch not in [1, 4, 5, 6, 7, 8, 11, 12, 13, 21, 24, 27, 35, 40, 42, 46, 52, 53, 54, 55, 59, 60, 63]: + # if batch != 4: continue start_time = time() print(f"Processing batch {batch} / {num_batchs - 1}") @@ -141,6 +145,12 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): start_time_3 = time() print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") + # _Computing rasters + print("Computing rasters") + subprocess.run([interpretor_path, "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) + start_time_4 = time() + print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") + # Overlay on CHM print("Overlaying with CHM") CHM = cfg_logReg['dev']['chm_layer'] @@ -152,12 +162,7 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # Perform operation on the partition sub_AOI = gpd.overlay(sub_AOI, partition_gdf, how='difference', keep_geom_type=True) - - # _Computing rasters - print("Computing rasters") - subprocess.run([interpretor_path, "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) - start_time_4 = time() - print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") + sub_AOI.to_file(os.path.join(temp_storage, 'sub_AOI.gpkg'), driver="GPKG") # _Greenery print("Computing greenery") @@ -189,11 +194,11 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ndvi_directory'])) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['lum_directory'])) print(f"Time for batch: {round((time() - start_time)/60, 2)}min") - + print("List of batchs to merge: ", temp_result_folders) # Merge results print("="*10 + "\nMERGING RESULTS...") df_results = gpd.GeoDataFrame() - for res_dir in temp_result_folders: + for _, res_dir in tqdm(enumerate(temp_result_folders), total=len(temp_result_folders), desc='Merging'): df_sub_res = gpd.read_file(os.path.join(WORKING_DIR, res_dir, 'inf_' + CLS_ML + '_' + MODEL_ML + '.gpkg')) df_results = df_sub_res if len(df_results) == 0 else gpd.GeoDataFrame(pd.concat([df_results, df_sub_res], ignore_index=True)) From 012d78ab27c8fd911bcf531b957f3fa71e8dea84 Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Thu, 23 Jan 2025 08:58:24 +0100 Subject: [PATCH 14/15] cleaning code and putting back overlay in greenery and roof_stats with flag in config --- config/logReg.yaml | 1 + scripts/greenery.py | 31 +++-------- scripts/infer_ml_batch.py | 113 ++++++++------------------------------ scripts/roof_stats.py | 22 ++++---- 4 files changed, 43 insertions(+), 124 deletions(-) diff --git a/config/logReg.yaml b/config/logReg.yaml index cb2cf15..041f0ef 100644 --- a/config/logReg.yaml +++ b/config/logReg.yaml @@ -22,6 +22,7 @@ dev: model_ml: 'LR' # 'LR' 'RF' trained_model_dir: ML/models epsg: 'epsg:2056' + do_overlay: False diff --git a/scripts/greenery.py b/scripts/greenery.py index b210a90..f24b23e 100644 --- a/scripts/greenery.py +++ b/scripts/greenery.py @@ -87,6 +87,7 @@ def do_greenery(tile, shapes_roof, roofs): TH_NDVI=cfg['th_ndvi'] TH_LUM=cfg['th_lum'] EPSG=cfg['epsg'] + DO_OVERLAY = cfg['do_overlay'] os.chdir(WORKING_DIR) @@ -138,29 +139,13 @@ def do_greenery(tile, shapes_roof, roofs): green_roofs_egid['EGID']=green_roofs_egid.index green_roofs_egid.index.names = ['Index'] - # logger.info('Filtering for overhanging vegetation...') - # try: - # CHM = os.path.join(WORKING_DIR, CHM_LAYER) - # print('starting to load CHM') - # time_start = time() - # CHM_GPD = dg.read_file(CHM, chunksize=100000) - # # CHM_GPD = CHM_GPD.compute() - # CHM_GPD = CHM_GPD.calculate_spatial_partitions() - # small_bounds = green_roofs_egid.total_bounds - # CHM_GPD = CHM_GPD.cx[ - # small_bounds[0]:small_bounds[2], small_bounds[1]:small_bounds[3] - # ] - # CHM_GPD = CHM_GPD.compute() - # CHM_GPD['geometry'] = CHM_GPD.buffer(1) - # print(f'finished to load CHM in {time() - time_start}sec') - - # print('starting overlay') - # time_start = time() - # green_roofs_egid=gpd.overlay(CHM_GPD, green_roofs_egid, how='difference') - # # green_roofs_egid = CHM_GPD.overlay(green_roofs_egid, how='difference') - # print(f'finished to overlay in {time() - time_start}sec') - # except Exception as e: - # logger.info(f"Error happened during overhanging veg filtering: {e}") + if DO_OVERLAY: + logger.info('Filtering for overhanging vegetation...') + CHM = os.path.join(WORKING_DIR, CHM_LAYER) + CHM_GPD=gpd.read_file(CHM) + CHM_GPD['geometry'] = CHM_GPD.buffer(1) + green_roofs_egid=gpd.overlay(green_roofs_egid, CHM_GPD, how='difference') + green_roofs_egid['area_green'] = green_roofs_egid.area diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index c9a12b9..b366e78 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -7,7 +7,6 @@ import shutil import yaml from tqdm import tqdm -import argparse from time import time import subprocess import tempfile @@ -15,7 +14,6 @@ import functions.fct_misc as fct_misc from copy import deepcopy import platform -from shapely.geometry import MultiPolygon BATCH_SIZE = 5000 @@ -38,7 +36,6 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): OUTPUTS=cfg_clipImage['clip_image']['outputs'] OUTPUT_DIR=OUTPUTS['clip_ortho_directory'] - # os.chdir(WORKING_DIR) fct_misc.ensure_dir_exists(OUTPUT_DIR) ORTHO_DIR=cfg_clipImage['clip_image']['inputs']['ortho_directory'] @@ -58,55 +55,13 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): else: interpretor_path = "./.venv/bin/python" - # # Filtering for overhanging vegetation - # def to_multipolygon(geometry): - # if geometry.geom_type == "Polygon": - # return MultiPolygon([geometry]) - # elif geometry.geom_type == "MultiPolygon": - # return geometry - # else: - # raise ValueError("No geometries after overlay!!!") - # return None # Handle unexpected geometry types if needed - - # CHM = cfg_logReg['dev']['chm_layer'] - # print('Filtering for overhanging vegetation...') - # # green_roofs_egid = gpd.read_file(os.path.join(WORKING_DIR, AOI)) - # time_start = time() - # CHM_GPD = dg.read_file(os.path.join(WORKING_DIR, CHM), chunksize=100000) - # delayed_partitions = CHM_GPD.to_delayed() - # print(f"1 - Length of AOI: {len(AOI)}") - # AOI.to_file(os.path.join(WORKING_DIR, "test_original_aoi.gpkg"), driver="GPKG") - # AOI = AOI.loc[AOI.geometry.is_valid] - # print(f"2 - Length of AOI: {len(AOI)}") - # AOI.to_file(os.path.join(WORKING_DIR, "test_valid_aoi.gpkg"), driver="GPKG") - # for _, delayed_partition in tqdm(enumerate(delayed_partitions), total=len(delayed_partitions)): - # # Compute the partition (convert to a GeoDataFrame) - # partition_gdf = delayed_partition.compute() - - # # Perform operation on the partition - # AOI = gpd.overlay(AOI, partition_gdf, how='difference', keep_geom_type=True) - # AOI['geometry'] = AOI['geometry'].apply(to_multipolygon) - # print(f"3 - Length of AOI: {len(AOI)}") - # AOI = AOI.loc[AOI.geometry.is_valid] - # print(f"4 - Length of AOI: {len(AOI)}") - # AOI.to_file(os.path.join(WORKING_DIR, "test_overlayed_aoi.gpkg"), driver="GPKG") - - - # print(f'finished to process CHM in {time() - time_start}sec') - num_batchs = int(len(AOI) / BATCH_SIZE - 1) + 1 + # Start batching temp_result_folders = [] for batch in range(num_batchs): - - batch_res_fold = os.path.join(WORKING_DIR, cfg_logReg['dev']['results_directory']) + f"/results_batch{batch}/" - temp_result_folders.append(batch_res_fold) - continue - if batch not in [1, 4, 5, 6, 7, 8, 11, 12, 13, 21, 24, 27, 35, 40, 42, 46, 52, 53, 54, 55, 59, 60, 63]: - # if batch != 4: - continue - start_time = time() print(f"Processing batch {batch} / {num_batchs - 1}") + start_time = time() # Select roofs to process sub_AOI = AOI.iloc[BATCH_SIZE * batch: min(BATCH_SIZE * (batch + 1), len(AOI))] @@ -131,6 +86,7 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): temp_cfg_logReg = deepcopy(cfg_logReg) temp_cfg_logReg['dev']['roofs_file'] = os.path.join(batch_res_fold, 'valid_samples.gpkg') temp_cfg_logReg['dev']['results_directory'] = batch_res_fold + temp_cfg_logReg['dev']['do_overlay'] = False temp_cfg_logReg_dir = os.path.join(temp_storage, "logRes.yaml") with open(temp_cfg_logReg_dir, 'w') as outfile: yaml.dump(temp_cfg_logReg, outfile) @@ -138,17 +94,15 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # Call subprocesses # _Clipping images print("Clipping images") - start_time_2 = time() - print(f"Time for loading initial stuff: {round((start_time_2 - start_time)/60, 2)}min") + time_1 = time() subprocess.run([interpretor_path, "./scripts/clip_image.py", '-cfg', temp_cfg_clipImage_dir]) - start_time_3 = time() - print(f"Time for clip_image script: {round((start_time_3 - start_time_2)/60, 2)}min") + time_2 = time() + print(f"Time for clip_image script: {round((time_2 - time_1)/60, 2)}min") # _Computing rasters print("Computing rasters") subprocess.run([interpretor_path, "./scripts/calculate_raster.py", "-cfg", temp_cfg_logReg_dir]) - start_time_4 = time() - print(f"Time for calculate_raster script: {round((start_time_4 - start_time_3)/60, 2)}min") + print(f"Time for calculate_raster script: {round((time() - time_2)/60, 2)}min") # Overlay on CHM print("Overlaying with CHM") @@ -165,9 +119,10 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # _Greenery print("Computing greenery") + time_1 = time() subprocess.run([interpretor_path, "./scripts/greenery.py", "-cfg", temp_cfg_logReg_dir]) - start_time_5 = time() - print(f"Time for greenery script: {round((start_time_5 - start_time_4)/60, 2)}min") + time_2 = time() + print(f"Time for greenery script: {round((time_2 - time_1)/60, 2)}min") temp_cfg_logReg['dev']['roofs_file'] = os.path.join(batch_res_fold, '0_500_green_roofs.gpkg') temp_cfg_logReg_dir = os.path.join(temp_storage, "logRes.yaml") @@ -177,14 +132,14 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # _Compute stats print("Computing stats") subprocess.run([interpretor_path, "./scripts/roof_stats.py", "-cfg", temp_cfg_logReg_dir]) - start_time_6 = time() - print(f"Time for roof_stats script: {round((start_time_6 - start_time_5)/60, 2)}min") + time_1 = time() + print(f"Time for roof_stats script: {round((time_1 - time_2)/60, 2)}min") # _Do inference print("Infering") subprocess.run([interpretor_path, "./scripts/infer_ml.py", "-cfg", temp_cfg_logReg_dir]) - start_time_7 = time() - print(f"Time for inference script: {round((start_time_7 - start_time_6)/60, 2)}min") + time_2 = time() + print(f"Time for inference script: {round((time_2 - time_1)/60, 2)}min") # Clean temp architecture os.remove(os.path.join(temp_storage, 'sub_AOI.gpkg')) @@ -192,8 +147,15 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ortho_directory'])) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['ndvi_directory'])) shutil.rmtree(os.path.join(WORKING_DIR, cfg_logReg['dev']['lum_directory'])) - print(f"Time for batch: {round((time() - start_time)/60, 2)}min") - print("List of batchs to merge: ", temp_result_folders) + + # Print time for batch + time_elapsed = time() - start_time + n_hours = int(time_elapsed / 3600) + n_min = int((time_elapsed % 3600) / 60) + n_sec = int(time_elapsed - n_hours * 3600 - n_min * 60) + print(f'Time for batch: {n_hours}:{n_min}:{n_sec}\n') + print("=" * 20 + "\n") + # Merge results print("="*10 + "\nMERGING RESULTS...") df_results = gpd.GeoDataFrame() @@ -207,35 +169,6 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): if __name__ == '__main__': - # # load input parameters - # with open("config/logReg.yaml") as fp: - # cfg_logReg = yaml.load(fp, Loader=yaml.FullLoader) - - # CLS_ML = cfg_logReg['dev']['cls_ml'] - # MODEL_ML = cfg_logReg['dev']['model_ml'] - - # WORKING_DIR = cfg_logReg['dev']['working_directory'] - # temp_result_folders = [ - # 'ML/results/results_batch0', - # 'ML/results/results_batch1', - # 'ML/results/results_batch2', - # ] - # WORKING_DIR = r"D:\GitHubProjects\STDL_vegroof_production" - # temp_result_folders = [f'ML/results_GE/results_batch{x}' for x in range(16)] - # # Merge results - # df_results = gpd.GeoDataFrame() - # for _, res_dir in tqdm(enumerate(temp_result_folders), total=len(temp_result_folders), desc='Merging results'): - # df_sub_res = gpd.read_file(os.path.join(WORKING_DIR, res_dir, "inf_binary_LR.gpkg")) - # df_results = df_sub_res if len(df_results) == 0 else gpd.GeoDataFrame(pd.concat([df_results, df_sub_res], ignore_index=True)) - - # df_results.to_file(os.path.join(WORKING_DIR, "ML/results_GE", 'results.gpkg'), driver="GPKG", index=False) - # quit() - # original = gpd.read_file(r'D:\GitHubProjects\STDL_vegroof_production\test_original_aoi.gpkg') - # valid = gpd.read_file(r'D:\GitHubProjects\STDL_vegroof_production\test_valid_aoi.gpkg') - # overlayed = gpd.read_file(r'D:\GitHubProjects\STDL_vegroof_production\test_overlayed_aoi.gpkg') - # quit() - - # load input parameters with open("config/logReg.yaml") as fp: cfg_logReg = yaml.load(fp, Loader=yaml.FullLoader) diff --git a/scripts/roof_stats.py b/scripts/roof_stats.py index 4e92d1b..69d4c76 100644 --- a/scripts/roof_stats.py +++ b/scripts/roof_stats.py @@ -116,6 +116,7 @@ def do_stats(roof): EGID_TRAIN_TEST=cfg['egid_train_test'] EPSG=cfg['epsg'] + DO_OVERLAY=cfg['do_overlay'] os.chdir(WORKING_DIR) @@ -141,17 +142,16 @@ def do_stats(roof): roofs.rename(columns={GREEN_CLS:'cls'}, inplace=True) roofs.rename(columns={GREEN_TAG:'green_tag'}, inplace=True) - # if not 'green_roofs' in ROOFS_POLYGONS: - # roofs['geometry'] = roofs.buffer(-1) - # logger.info('Filtering for overhanging vegetation...') - # roofs = roofs[roofs.geometry.is_empty==False] - # CHM = os.path.join(WORKING_DIR, CHM_LAYER) - # chm=gpd.read_file(CHM) - # chm['geometry'] = chm.buffer(1) - # roofs_chm=gpd.overlay(roofs, chm, how='difference') - # else: - # roofs_chm=roofs - roofs_chm=roofs + if not 'green_roofs' in ROOFS_POLYGONS and DO_OVERLAY: + roofs['geometry'] = roofs.buffer(-1) + logger.info('Filtering for overhanging vegetation...') + roofs = roofs[roofs.geometry.is_empty==False] + CHM = os.path.join(WORKING_DIR, CHM_LAYER) + chm=gpd.read_file(CHM) + chm['geometry'] = chm.buffer(1) + roofs_chm=gpd.overlay(roofs, chm, how='difference') + else: + roofs_chm=roofs if GT: logger.info('Defining training and test dataset...') From 745fb41b47cde3ee8814782ef0d4cade11560685 Mon Sep 17 00:00:00 2001 From: destoswa <84578314+destoswa@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:02:21 +0100 Subject: [PATCH 15/15] small changes --- config/logReg.yaml | 30 ------------------------------ scripts/infer_ml_batch.py | 11 ++++++++++- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/config/logReg.yaml b/config/logReg.yaml index 041f0ef..4316041 100644 --- a/config/logReg.yaml +++ b/config/logReg.yaml @@ -23,33 +23,3 @@ dev: trained_model_dir: ML/models epsg: 'epsg:2056' do_overlay: False - - - -# hydra: -# run: -# dir: 02_intermediate/th/${now:%Y-%m-%d}/${now:%H-%M-%S} - -# dev: -# working_directory: C:/Users/cmarmy/Documents/STDL/proj-vegroofs/data -# ortho_directory: 02_intermediate/images/infer_moitie/tiles -# tile_delimitation: 02_intermediate/images/infer_moitie/extent/ -# ndvi_directory: 02_intermediate/images/infer_moitie/ndvi -# lum_directory: 02_intermediate/images/infer_moitie/lum -# roofs_file: 02_intermediate/th/2024-08-15/09-12-47/0_500_green_roofs.shp # 02_intermediate/gt/inf_roofs.gpkg # -# roofs_layer: -# gt: False -# green_tag: 'veg_new_3' -# green_cls: 'class_3' -# chm_layer: 02_intermediate/autres/CHM_AOI_inf.gpkg -# results_directory: 03_results/infer_moitie/ -# egid_train_test: egid_train_test_gt.csv -# th_ndvi: 0 # no thresholding -1 -# th_lum: 500 # no thresholding 765 or 210000 -# cls_ml: 'binary' # 'binary' 'multi' 'multi_aggreg' -# model_ml: 'LR' # 'LR' 'RF' -# trained_model_dir: 03_results/scratch_gt/ -# epsg: 'epsg:2056' - - - diff --git a/scripts/infer_ml_batch.py b/scripts/infer_ml_batch.py index b366e78..bd2a7ea 100644 --- a/scripts/infer_ml_batch.py +++ b/scripts/infer_ml_batch.py @@ -15,7 +15,7 @@ from copy import deepcopy import platform -BATCH_SIZE = 5000 +BATCH_SIZE = 200 def infer_ml_batch(cfg_clipImage, cfg_logReg): WORKING_DIR = cfg_clipImage['clip_image']['working_directory'] @@ -59,6 +59,7 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): # Start batching temp_result_folders = [] + start_total = time() for batch in range(num_batchs): print(f"Processing batch {batch} / {num_batchs - 1}") start_time = time() @@ -167,6 +168,14 @@ def infer_ml_batch(cfg_clipImage, cfg_logReg): shutil.rmtree(temp_storage) print("MERGING COMPLETED!") + # Print time for batch + time_elapsed = time() - start_total + n_hours = int(time_elapsed / 3600) + n_min = int((time_elapsed % 3600) / 60) + n_sec = int(time_elapsed - n_hours * 3600 - n_min * 60) + print("=" * 20 + "\n") + print(f'Total time : {n_hours}:{n_min}:{n_sec}\n') + print("=" * 20 + "\n") if __name__ == '__main__': # load input parameters