diff --git a/quackosm/functions.py b/quackosm/functions.py index a0278ad..fb662b7 100644 --- a/quackosm/functions.py +++ b/quackosm/functions.py @@ -21,6 +21,7 @@ def convert_pbf_to_gpq( tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None, geometry_filter: Optional[BaseGeometry] = None, result_file_path: Optional[Union[str, Path]] = None, + keep_all_tags: bool = False, explode_tags: Optional[bool] = None, ignore_cache: bool = False, filter_osm_ids: Optional[list[str]] = None, @@ -47,10 +48,14 @@ def convert_pbf_to_gpq( result_file_path (Union[str, Path], optional): Where to save the geoparquet file. If not provided, will be generated based on hashes from provided tags filter and geometry filter. Defaults to `None`. + keep_all_tags (bool, optional): Works only with the `tags_filter` parameter. + Whether to keep all tags related to the element, or return only those defined + in the `tags_filter`. When `True`, will override the optional grouping defined + in the `tags_filter`. Defaults to `False`. explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. - If `None`, will be set based on `tags_filter` parameter. If no tags filter is provided, - then `explode_tags` will set to `False`, if there is tags filter it will set to `True`. - Defaults to `None`. + If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters. + If there is tags filter defined and `keep_all_tags` is set to `False`, then it will + be set to `True`. Otherwise it will be set to `False`. Defaults to `None`. ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False. filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. @@ -220,6 +225,7 @@ def convert_pbf_to_gpq( ).convert_pbf_to_gpq( pbf_path=pbf_path, result_file_path=result_file_path, + keep_all_tags=keep_all_tags, explode_tags=explode_tags, ignore_cache=ignore_cache, filter_osm_ids=filter_osm_ids, @@ -230,6 +236,7 @@ def get_features_gdf( file_paths: Union[str, Path, Iterable[Union[str, Path]]], tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None, geometry_filter: Optional[BaseGeometry] = None, + keep_all_tags: bool = False, explode_tags: Optional[bool] = None, ignore_cache: bool = False, filter_osm_ids: Optional[list[str]] = None, @@ -257,10 +264,14 @@ def get_features_gdf( If `None`, handler will allow all of the tags to be parsed. Defaults to `None`. geometry_filter (BaseGeometry, optional): Region which can be used to filter only intersecting OSM objects. Defaults to `None`. + keep_all_tags (bool, optional): Works only with the `tags_filter` parameter. + Whether to keep all tags related to the element, or return only those defined + in the `tags_filter`. When `True`, will override the optional grouping defined + in the `tags_filter`. Defaults to `False`. explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. - If `None`, will be set based on `tags_filter` parameter. If no tags filter is provided, - then `explode_tags` will set to `False`, if there is tags filter it will set to `True`. - Defaults to `None`. + If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters. + If there is tags filter defined and `keep_all_tags` is set to `False`, then it will + be set to `True`. Otherwise it will be set to `False`. Defaults to `None`. ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False. filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. @@ -392,6 +403,7 @@ def get_features_gdf( osm_way_polygon_features_config=osm_way_polygon_features_config, ).get_features_gdf( file_paths=file_paths, + keep_all_tags=keep_all_tags, explode_tags=explode_tags, ignore_cache=ignore_cache, filter_osm_ids=filter_osm_ids, diff --git a/quackosm/pbf_file_reader.py b/quackosm/pbf_file_reader.py index 986a533..a8d5edf 100644 --- a/quackosm/pbf_file_reader.py +++ b/quackosm/pbf_file_reader.py @@ -142,6 +142,7 @@ def __init__( def get_features_gdf( self, file_paths: Union[str, Path, Iterable[Union[str, Path]]], + keep_all_tags: bool = False, explode_tags: Optional[bool] = None, ignore_cache: bool = False, filter_osm_ids: Optional[list[str]] = None, @@ -155,10 +156,14 @@ def get_features_gdf( Args: file_paths (Union[str, Path, Iterable[Union[str, Path]]]): Path or list of paths of `*.osm.pbf` files to be parsed. + keep_all_tags (bool, optional): Works only with the `tags_filter` parameter. + Whether to keep all tags related to the element, or return only those defined + in the `tags_filter`. When `True`, will override the optional grouping defined + in the `tags_filter`. Defaults to `False`. explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. - If `None`, will be set based on `tags_filter` parameter. - If no tags filter is provided, then `explode_tags` will set to `False`, - if there is tags filter it will set to `True`. Defaults to `None`. + If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters. + If there is tags filter defined and `keep_all_tags` is set to `False`, then it will + be set to `True`. Otherwise it will be set to `False`. Defaults to `None`. ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False. filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. @@ -203,6 +208,7 @@ def convert_pbf_to_gpq( self, pbf_path: Union[str, Path], result_file_path: Optional[Union[str, Path]] = None, + keep_all_tags: bool = False, explode_tags: Optional[bool] = None, ignore_cache: bool = False, filter_osm_ids: Optional[list[str]] = None, @@ -215,10 +221,14 @@ def convert_pbf_to_gpq( result_file_path (Union[str, Path], optional): Where to save the geoparquet file. If not provided, will be generated based on hashes from provided tags filter and geometry filter. Defaults to `None`. + keep_all_tags (bool, optional): Works only with the `tags_filter` parameter. + Whether to keep all tags related to the element, or return only those defined + in the `tags_filter`. When `True`, will override the optional grouping defined + in the `tags_filter`. Defaults to `False`. explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. - If `None`, will be set based on `tags_filter` parameter. - If no tags filter is provided, then `explode_tags` will set to `False`, - if there is tags filter it will set to `True`. Defaults to `None`. + If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters. + If there is tags filter defined and `keep_all_tags` is set to `False`, then it will + be set to `True`. Otherwise it will be set to `False`. Defaults to `None`. ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False. filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. diff --git a/tests/base/test_pbf_file_reader.py b/tests/base/test_pbf_file_reader.py index 7e2682a..b1a1739 100644 --- a/tests/base/test_pbf_file_reader.py +++ b/tests/base/test_pbf_file_reader.py @@ -25,7 +25,7 @@ from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS, HEX2VEC_FILTER from quackosm._constants import FEATURES_INDEX -from quackosm._osm_tags_filters import OsmTagsFilter +from quackosm._osm_tags_filters import GroupedOsmTagsFilter, OsmTagsFilter from quackosm.pbf_file_reader import PbfFileReader ut = TestCase() @@ -140,6 +140,46 @@ def test_pbf_reader_features_ids_filtering(filter_osm_ids: list[str], expected_r assert len(features_gdf) == expected_result_length +@pytest.mark.parametrize( # type: ignore + "filter_osm_id,osm_tags_filter,keep_all_tags,expected_tags_keys", + [ + ("way/389888402", {"building": "apartments"}, False, ["building"]), + ( + "way/389888402", + {"building": "apartments"}, + True, + [ + "addr:city", + "addr:country", + "addr:housenumber", + "addr:postcode", + "addr:street", + "building", + "building:levels", + ], + ), + ], +) +def test_pbf_reader_proper_tags_reading( + filter_osm_id: str, + osm_tags_filter: Union[OsmTagsFilter, GroupedOsmTagsFilter], + keep_all_tags: bool, + expected_tags_keys: list[str], +): + """Test proper tags tags reading in `PbfFileReader`.""" + file_name = "monaco.osm.pbf" + features_gdf = PbfFileReader(tags_filter=osm_tags_filter).get_features_gdf( + file_paths=[Path(__file__).parent.parent / "test_files" / file_name], + ignore_cache=True, + filter_osm_ids=[filter_osm_id], + explode_tags=False, + keep_all_tags=keep_all_tags, + ) + assert len(features_gdf) == 1 + returned_tags_keys = list(features_gdf.iloc[0].tags.keys()) + ut.assertListEqual(returned_tags_keys, expected_tags_keys) + + # Copyright (C) 2011 by Hong Minhee , # Robert Kajic # Copyright (C) 2020 by Salesforce.com, Inc