Skip to content

Commit

Permalink
Fix buildings import with new directory structure
Browse files Browse the repository at this point in the history
  • Loading branch information
bereba committed Jan 16, 2025
1 parent 3a93916 commit c72d4ad
Showing 1 changed file with 70 additions and 65 deletions.
135 changes: 70 additions & 65 deletions raw_data/preprocessing_scripts/import_building_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,98 +2,76 @@
import os
import sys

import pandas as pd

from pylovo.GridGenerator import GridGenerator
from pylovo.SyngridDatabaseConstructor import SyngridDatabaseConstructor


def create_list_of_shp_files(files_to_add, path_to_this_folder):
"""
making a list of dicts for the function scg.ogr_to_db()
def import_buildings_for_single_plz(gg):
"""
ogr_ls_dict = []
for file in files_to_add:
if "Oth" in file:
table_name = "oth"
elif "Res" in file:
table_name = "res"
else:
raise ValueError("shape file cannot be assigned to res or oth")
path = file
path = path.replace(path_to_this_folder, "./raw_data") # ".\\raw_data") #
ogr_ls_dict.append({"path": path, "table_name": table_name})
if ogr_ls_dict:
return ogr_ls_dict
else:
raise Exception("Shapefiles of buildings for requested PLZ are not available.")

Imports ags building data to the database for a given PLZ specified in the GridGenerator object.
AGS is added to ags_log table to avoid importing the same building data again.
def import_buildings_for_single_plz(gg: GridGenerator) -> None: # , plz_regiostar):
"""imports building data to db for plz:\n
* PLZ is matched with AGS\n
* file name is generated\n
* buildings files are imported to database with SyngridDatabaseConstructor\n
* AGS is added to AGS as not to import same building data again
:param gg: Grid generator object to get the plz and functions from
:type plz: string
:param gg: Grid generator object for querying relevant PLZ and AGS data
"""
# get AGS for PLZ
# Retrieve AGS for the specified PLZ
pg = gg.pgr
ags_to_add = pg.get_municipal_register_for_plz(plz=gg.plz)

# check whether plz exists
# Check if the PLZ exists
if ags_to_add.empty:
raise Exception("PLZ does not exist in register")
# get name and ags for the desired plz
raise Exception("PLZ does not exist in the municipal register.")

# Extract name and AGS for the desired PLZ
gg.logger.info(f"LV grids will be generated for {ags_to_add.iloc[0]['plz']} {ags_to_add.iloc[0]['name_city']}")
ags = ags_to_add.iloc[0]['ags']
gg.logger.info(f'It´s AGS is:{ags}')
ags = ags_to_add.iloc[0]["ags"]
gg.logger.info(f"It's AGS is: {ags}")

# check in ags_log if ags is already on the database
# Check if AGS is already in the database (avoid duplication)
df_log = pg.get_ags_log()
if ags in df_log['ags'].values:
gg.logger.info('Buildings of AGS are already on the pylovo database.')
if ags in df_log["ags"].values:
gg.logger.info("Buildings of this AGS are already in the pylovo database.")
return
else:
gg.logger.info('Buildings are not yet on the database and will be added to pylovo database.')
gg.logger.info("Buildings for this AGS are not in the database and will be added.")

# absolute path to search all shape files inside a subfolders
path_to_this_folder = os.path.dirname(__file__)
data_path = os.path.join(path_to_this_folder, '**', '*.shp')
sys.path.append(data_path)
# Define the path for building shapefiles
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "buildings"))
shapefiles_pattern = os.path.join(data_path, "*.shp") # Pattern for shapefiles

# retrieving all shape files
files_list = glob.glob(data_path, recursive=True)
# Retrieve all matching shapefiles
files_list = glob.glob(shapefiles_pattern, recursive=True)

# creating a list that only contains the files to add
files_to_add = []
for file in files_list:
if str(ags) in file:
files_to_add.append(file)
# Filter files containing the specific AGS in their filenames
files_to_add = [file for file in files_list if str(ags) in file]

# making a list of dicts for the function scg.ogr_to_db()
ogr_ls_dict = create_list_of_shp_files(files_to_add, path_to_this_folder)
# Handle cases where no matching files are found
if not files_to_add:
raise FileNotFoundError(f"No shapefiles found for AGS {ags} in {data_path}")

# adding the buildings to the database
sgc = SyngridDatabaseConstructor(pgr=pg)
sgc.ogr_to_db(ogr_ls_dict)
# Create a list of dictionaries for ogr_to_db()
ogr_ls_dict = create_list_of_shp_files(files_to_add)

# Add building data to the database
sgc = SyngridDatabaseConstructor(pgr=pg)
sgc.ogr_to_db(ogr_ls_dict)

# Log the successfully added AGS to the log table in the database
pg.write_ags_log(ags)

gg.logger.info(f"Buildings for AGS {ags} have been successfully added to the database.")

# adding the added ags to the log table
pg.write_ags_log(ags)


def import_buildings_for_multiple_plz(sample_plz):
"""
imports building data to db for multiple plz
"""
# absolute path to search all shape files inside a subfolders
path_to_this_folder = os.path.dirname(__file__)
data_path = os.path.join(path_to_this_folder, '**', '*.shp')
sys.path.append(data_path)
# Define the path for building shapefiles
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "buildings"))
shapefiles_pattern = os.path.join(data_path, "*.shp") # Pattern for shapefiles

# retrieving all shape files
files_list = glob.glob(data_path, recursive=True)
files_list = glob.glob(shapefiles_pattern, recursive=True)

# get all AGS that need to be imported for the classification
ags_to_add = sample_plz['ags']
Expand All @@ -118,12 +96,39 @@ def import_buildings_for_multiple_plz(sample_plz):

if files_to_add:
# define a list of required shapefiles to add to the database for the function scg.ogr_to_db()
ogr_ls_dict = create_list_of_shp_files(files_to_add, path_to_this_folder)
ogr_ls_dict = create_list_of_shp_files(files_to_add)

# adding the buildings to the database
sgc = SyngridDatabaseConstructor()
sgc.ogr_to_db(ogr_ls_dict)

# adding the added ags to the log file
for ags in ags_to_add:
pg.write_ags_log(int(ags))
pg.write_ags_log(int(ags))

def create_list_of_shp_files(files_to_add):
"""
Creates a list of dictionaries required for the scg.ogr_to_db() function.
:param files_to_add: List of shapefile paths to add.
:return: A list of dictionaries with keys "path" and "table_name".
"""
ogr_ls_dict = []

# Process each file path
for file_path in files_to_add:
# Determine table_name based on file naming convention
if "Oth" in file_path:
table_name = "oth"
elif "Res" in file_path:
table_name = "res"
else:
raise ValueError(f"Shapefile '{file_path}' cannot be assigned to 'res' or 'oth'.")

ogr_ls_dict.append({"path": file_path, "table_name": table_name})

# Ensure the list is not empty
if ogr_ls_dict:
return ogr_ls_dict
else:
raise Exception("No valid shapefiles found for the requested PLZ.")

0 comments on commit c72d4ad

Please sign in to comment.