diff --git a/CHANGELOG.md b/CHANGELOG.md index db868cee..365f11c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## 1.5.12 (2025-01-29) + +### Refactor + +- Remove unused stats collection, fix stats templates +- Multiple custom visualizations for custom/HDX exports + ## 1.5.11 (2025-01-28) ### Fix diff --git a/pyproject.toml b/pyproject.toml index 3c150f7f..5b41c80f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "raw-data-api" -version = "1.5.11" +version = "1.5.12" description = "Set of high-performant APIs for transforming and exporting OpenStreetMap (OSM) data in different GIS file formats." readme = "README.md" authors = [ diff --git a/requirements.txt b/requirements.txt index e1930f50..eb03b4ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ humanize==4.9.0 python-slugify==8.0.1 geomet==1.1.0 PyYAML==6.0.1 -geojson-stats==0.2.5 +geojson-stats==0.2.6 transliterate==1.10.2 ## documentation diff --git a/src/app.py b/src/app.py index 1b313d8c..d1f3c433 100644 --- a/src/app.py +++ b/src/app.py @@ -1299,6 +1299,7 @@ def __init__(self, params, uid=None): self.default_export_base_name = ( self.iso3.upper() if self.iso3 else self.params.dataset.dataset_prefix ) + self.default_export_path = os.path.join( export_path, self.uuid, @@ -1307,6 +1308,7 @@ def __init__(self, params, uid=None): ) if os.path.exists(self.default_export_path): shutil.rmtree(self.default_export_path, ignore_errors=True) + os.makedirs(self.default_export_path) if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: @@ -1941,9 +1943,22 @@ def add_resource(self, resource_meta): # Add customviz if available if resource_meta.get("stats_html"): - self.dataset.update( - {"customviz": [{"url": resource_meta["stats_html"]}]} - ) + dataset_customviz = self.dataset.get("customviz") + if not dataset_customviz: + dataset_customviz = [ + { + "name": resource_meta["name"], + "url": resource_meta["stats_html"], + } + ] + else: + dataset_customviz.append( + { + "name": resource_meta["name"], + "url": resource_meta["stats_html"], + } + ) + self.dataset.update({"customviz": dataset_customviz}) def upload_dataset(self, dump_config_to_s3=False): """ diff --git a/src/post_processing/geojson_stats.py b/src/post_processing/geojson_stats.py index 9ca74d68..6a5e612e 100644 --- a/src/post_processing/geojson_stats.py +++ b/src/post_processing/geojson_stats.py @@ -1,12 +1,9 @@ from geojson_stats.stats import Stats from geojson_stats.html import Html -CONFIG_AREA = ["building"] -CONFIG_LENGTH = ["highway", "waterway"] - class GeoJSONStats(Stats): - """Used for collecting stats while processing GeoJSON files line by line""" + """Used for collecting stats while processing GeoJSON files""" def __init__(self, filters, *args, **kwargs): super().__init__(*args, **kwargs) @@ -14,48 +11,14 @@ def __init__(self, filters, *args, **kwargs): self.config.clean = True self.config.properties_prop = "properties.tags" - if filters and filters.tags: - for tag in CONFIG_AREA: - if self.check_filter(filters.tags, tag): - self.config.keys.append(tag) - self.config.value_keys.append(tag) - self.config.area = True - - for tag in CONFIG_LENGTH: - if self.check_filter(filters.tags, tag): - self.config.keys.append(tag) - self.config.value_keys.append(tag) - self.config.length = True - - def check_filter(self, tags, tag): - """ - Check if a tag is present in tag filters - """ - - if tags.all_geometry: - if tags.all_geometry.join_or and tag in tags.all_geometry.join_or: - return True - if tags.all_geometry.join_and and tag in tags.all_geometry.join_and: - return True - if tags.polygon: - if tags.polygon.join_or and tag in tags.polygon.join_or: - return True - if tags.polygon.join_and and tag in tags.polygon.join_and: - return True - if tags.line: - if tags.line.join_or and tag in tags.line.join_or: - return True - if tags.line.join_and and tag in tags.line.join_and: - return True - def raw_data_line_stats(self, json_object: dict): """ Process a GeoJSON line (for getting stats) and return that line """ self.get_object_stats(json_object) - def html(self, tpl): + def html(self, tpl, tpl_params): """ Returns stats Html object, generated from stats data using a template """ - return Html(tpl, self) + return Html(tpl, self, tpl_params) diff --git a/src/post_processing/processor.py b/src/post_processing/processor.py index c7b416f2..ce07dc15 100644 --- a/src/post_processing/processor.py +++ b/src/post_processing/processor.py @@ -4,9 +4,17 @@ import os import pathlib +CATEGORIES_CONFIG = { + "roads": {"tag": "highway", "length": True, "area": False}, + "buildings": {"tag": "building", "length": False, "area": True}, + "waterways": {"tag": "waterway", "length": True, "area": False}, + "railways": {"tag": "railway", "length": True, "area": False}, + "default": {"tag": None, "length": False, "area": False}, +} + class PostProcessor: - """Used for posst-process data while processing GeoJSON files line by line""" + """Used for post-process GeoJSON files""" options = {} filters = {} @@ -27,6 +35,13 @@ def post_process_line(self, line: str): return json.dumps(line_object) + def get_categories_config(self, category_name): + """ + Get configuration for categories + """ + config = CATEGORIES_CONFIG.get(category_name) + return config if config else CATEGORIES_CONFIG["default"] + def custom( self, category_name, export_format_path, export_filename, file_export_path ): @@ -35,25 +50,12 @@ def custom( """ self.geoJSONStats.config.properties_prop = "properties" - category_tag = "" - if category_name == "roads": - category_tag = "highway" - self.geoJSONStats.config.length = True - elif category_name == "buildings": - category_tag = "building" - self.geoJSONStats.config.area = True - elif category_name == "waterways": - category_tag = "waterway" - self.geoJSONStats.config.length = True - elif category_name == "railways": - category_tag = "railway" - self.geoJSONStats.config.length = True + category_config = self.get_categories_config(category_name) + category_tag = category_config["tag"] + self.geoJSONStats.config.length = category_config["length"] + self.geoJSONStats.config.area = category_config["area"] if self.options["include_stats"]: - if category_tag: - self.geoJSONStats.config.keys.append(category_tag) - self.geoJSONStats.config.value_keys.append(category_tag) - path_input = os.path.join(export_format_path, f"{export_filename}.geojson") path_output = os.path.join( export_format_path, f"{export_filename}-post.geojson" @@ -102,7 +104,9 @@ def custom( project_root, "{tpl}_tpl.html".format(tpl=tpl), ) - geojson_stats_html = self.geoJSONStats.html(tpl_path).build() + geojson_stats_html = self.geoJSONStats.html( + tpl_path, {"title": f"{export_filename}.geojson"} + ).build() upload_html_path = os.path.join(file_export_path, "stats-summary.html") with open(upload_html_path, "w") as f: f.write(geojson_stats_html) diff --git a/src/post_processing/stats_building_tpl.html b/src/post_processing/stats_building_tpl.html index 97c3f659..d63415b9 100644 --- a/src/post_processing/stats_building_tpl.html +++ b/src/post_processing/stats_building_tpl.html @@ -3,31 +3,31 @@ - HOT Export Stats