diff --git a/raw_data/preprocessing_scripts/import_building_data.py b/raw_data/preprocessing_scripts/import_building_data.py index 6cf69c1..1fa2a2f 100644 --- a/raw_data/preprocessing_scripts/import_building_data.py +++ b/raw_data/preprocessing_scripts/import_building_data.py @@ -2,98 +2,76 @@ import os import sys -import pandas as pd - from pylovo.GridGenerator import GridGenerator from pylovo.SyngridDatabaseConstructor import SyngridDatabaseConstructor -def create_list_of_shp_files(files_to_add, path_to_this_folder): - """ - making a list of dicts for the function scg.ogr_to_db() +def import_buildings_for_single_plz(gg): """ - ogr_ls_dict = [] - for file in files_to_add: - if "Oth" in file: - table_name = "oth" - elif "Res" in file: - table_name = "res" - else: - raise ValueError("shape file cannot be assigned to res or oth") - path = file - path = path.replace(path_to_this_folder, "./raw_data") # ".\\raw_data") # - ogr_ls_dict.append({"path": path, "table_name": table_name}) - if ogr_ls_dict: - return ogr_ls_dict - else: - raise Exception("Shapefiles of buildings for requested PLZ are not available.") - + Imports ags building data to the database for a given PLZ specified in the GridGenerator object. + AGS is added to ags_log table to avoid importing the same building data again. -def import_buildings_for_single_plz(gg: GridGenerator) -> None: # , plz_regiostar): - """imports building data to db for plz:\n - * PLZ is matched with AGS\n - * file name is generated\n - * buildings files are imported to database with SyngridDatabaseConstructor\n - * AGS is added to AGS as not to import same building data again - - :param gg: Grid generator object to get the plz and functions from - :type plz: string + :param gg: Grid generator object for querying relevant PLZ and AGS data """ - # get AGS for PLZ + # Retrieve AGS for the specified PLZ pg = gg.pgr ags_to_add = pg.get_municipal_register_for_plz(plz=gg.plz) - # check whether plz exists + # Check if the PLZ exists if ags_to_add.empty: - raise Exception("PLZ does not exist in register") - # get name and ags for the desired plz + raise Exception("PLZ does not exist in the municipal register.") + + # Extract name and AGS for the desired PLZ gg.logger.info(f"LV grids will be generated for {ags_to_add.iloc[0]['plz']} {ags_to_add.iloc[0]['name_city']}") - ags = ags_to_add.iloc[0]['ags'] - gg.logger.info(f'It´s AGS is:{ags}') + ags = ags_to_add.iloc[0]["ags"] + gg.logger.info(f"It's AGS is: {ags}") - # check in ags_log if ags is already on the database + # Check if AGS is already in the database (avoid duplication) df_log = pg.get_ags_log() - if ags in df_log['ags'].values: - gg.logger.info('Buildings of AGS are already on the pylovo database.') + if ags in df_log["ags"].values: + gg.logger.info("Buildings of this AGS are already in the pylovo database.") + return else: - gg.logger.info('Buildings are not yet on the database and will be added to pylovo database.') + gg.logger.info("Buildings for this AGS are not in the database and will be added.") - # absolute path to search all shape files inside a subfolders - path_to_this_folder = os.path.dirname(__file__) - data_path = os.path.join(path_to_this_folder, '**', '*.shp') - sys.path.append(data_path) + # Define the path for building shapefiles + data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "buildings")) + shapefiles_pattern = os.path.join(data_path, "*.shp") # Pattern for shapefiles - # retrieving all shape files - files_list = glob.glob(data_path, recursive=True) + # Retrieve all matching shapefiles + files_list = glob.glob(shapefiles_pattern, recursive=True) - # creating a list that only contains the files to add - files_to_add = [] - for file in files_list: - if str(ags) in file: - files_to_add.append(file) + # Filter files containing the specific AGS in their filenames + files_to_add = [file for file in files_list if str(ags) in file] - # making a list of dicts for the function scg.ogr_to_db() - ogr_ls_dict = create_list_of_shp_files(files_to_add, path_to_this_folder) + # Handle cases where no matching files are found + if not files_to_add: + raise FileNotFoundError(f"No shapefiles found for AGS {ags} in {data_path}") - # adding the buildings to the database - sgc = SyngridDatabaseConstructor(pgr=pg) - sgc.ogr_to_db(ogr_ls_dict) + # Create a list of dictionaries for ogr_to_db() + ogr_ls_dict = create_list_of_shp_files(files_to_add) + + # Add building data to the database + sgc = SyngridDatabaseConstructor(pgr=pg) + sgc.ogr_to_db(ogr_ls_dict) + + # Log the successfully added AGS to the log table in the database + pg.write_ags_log(ags) + + gg.logger.info(f"Buildings for AGS {ags} have been successfully added to the database.") - # adding the added ags to the log table - pg.write_ags_log(ags) def import_buildings_for_multiple_plz(sample_plz): """ imports building data to db for multiple plz """ - # absolute path to search all shape files inside a subfolders - path_to_this_folder = os.path.dirname(__file__) - data_path = os.path.join(path_to_this_folder, '**', '*.shp') - sys.path.append(data_path) + # Define the path for building shapefiles + data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "buildings")) + shapefiles_pattern = os.path.join(data_path, "*.shp") # Pattern for shapefiles # retrieving all shape files - files_list = glob.glob(data_path, recursive=True) + files_list = glob.glob(shapefiles_pattern, recursive=True) # get all AGS that need to be imported for the classification ags_to_add = sample_plz['ags'] @@ -118,7 +96,7 @@ def import_buildings_for_multiple_plz(sample_plz): if files_to_add: # define a list of required shapefiles to add to the database for the function scg.ogr_to_db() - ogr_ls_dict = create_list_of_shp_files(files_to_add, path_to_this_folder) + ogr_ls_dict = create_list_of_shp_files(files_to_add) # adding the buildings to the database sgc = SyngridDatabaseConstructor() @@ -126,4 +104,31 @@ def import_buildings_for_multiple_plz(sample_plz): # adding the added ags to the log file for ags in ags_to_add: - pg.write_ags_log(int(ags)) \ No newline at end of file + pg.write_ags_log(int(ags)) + +def create_list_of_shp_files(files_to_add): + """ + Creates a list of dictionaries required for the scg.ogr_to_db() function. + + :param files_to_add: List of shapefile paths to add. + :return: A list of dictionaries with keys "path" and "table_name". + """ + ogr_ls_dict = [] + + # Process each file path + for file_path in files_to_add: + # Determine table_name based on file naming convention + if "Oth" in file_path: + table_name = "oth" + elif "Res" in file_path: + table_name = "res" + else: + raise ValueError(f"Shapefile '{file_path}' cannot be assigned to 'res' or 'oth'.") + + ogr_ls_dict.append({"path": file_path, "table_name": table_name}) + + # Ensure the list is not empty + if ogr_ls_dict: + return ogr_ls_dict + else: + raise Exception("No valid shapefiles found for the requested PLZ.") \ No newline at end of file