diff --git a/quackosm/functions.py b/quackosm/functions.py
index a0278ad..fb662b7 100644
--- a/quackosm/functions.py
+++ b/quackosm/functions.py
@@ -21,6 +21,7 @@ def convert_pbf_to_gpq(
tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None,
geometry_filter: Optional[BaseGeometry] = None,
result_file_path: Optional[Union[str, Path]] = None,
+ keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
@@ -47,10 +48,14 @@ def convert_pbf_to_gpq(
result_file_path (Union[str, Path], optional): Where to save
the geoparquet file. If not provided, will be generated based on hashes
from provided tags filter and geometry filter. Defaults to `None`.
+ keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
+ Whether to keep all tags related to the element, or return only those defined
+ in the `tags_filter`. When `True`, will override the optional grouping defined
+ in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
- If `None`, will be set based on `tags_filter` parameter. If no tags filter is provided,
- then `explode_tags` will set to `False`, if there is tags filter it will set to `True`.
- Defaults to `None`.
+ If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
+ If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
+ be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
@@ -220,6 +225,7 @@ def convert_pbf_to_gpq(
).convert_pbf_to_gpq(
pbf_path=pbf_path,
result_file_path=result_file_path,
+ keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
filter_osm_ids=filter_osm_ids,
@@ -230,6 +236,7 @@ def get_features_gdf(
file_paths: Union[str, Path, Iterable[Union[str, Path]]],
tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None,
geometry_filter: Optional[BaseGeometry] = None,
+ keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
@@ -257,10 +264,14 @@ def get_features_gdf(
If `None`, handler will allow all of the tags to be parsed. Defaults to `None`.
geometry_filter (BaseGeometry, optional): Region which can be used to filter only
intersecting OSM objects. Defaults to `None`.
+ keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
+ Whether to keep all tags related to the element, or return only those defined
+ in the `tags_filter`. When `True`, will override the optional grouping defined
+ in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
- If `None`, will be set based on `tags_filter` parameter. If no tags filter is provided,
- then `explode_tags` will set to `False`, if there is tags filter it will set to `True`.
- Defaults to `None`.
+ If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
+ If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
+ be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
@@ -392,6 +403,7 @@ def get_features_gdf(
osm_way_polygon_features_config=osm_way_polygon_features_config,
).get_features_gdf(
file_paths=file_paths,
+ keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
filter_osm_ids=filter_osm_ids,
diff --git a/quackosm/pbf_file_reader.py b/quackosm/pbf_file_reader.py
index 986a533..a8d5edf 100644
--- a/quackosm/pbf_file_reader.py
+++ b/quackosm/pbf_file_reader.py
@@ -142,6 +142,7 @@ def __init__(
def get_features_gdf(
self,
file_paths: Union[str, Path, Iterable[Union[str, Path]]],
+ keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
@@ -155,10 +156,14 @@ def get_features_gdf(
Args:
file_paths (Union[str, Path, Iterable[Union[str, Path]]]):
Path or list of paths of `*.osm.pbf` files to be parsed.
+ keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
+ Whether to keep all tags related to the element, or return only those defined
+ in the `tags_filter`. When `True`, will override the optional grouping defined
+ in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
- If `None`, will be set based on `tags_filter` parameter.
- If no tags filter is provided, then `explode_tags` will set to `False`,
- if there is tags filter it will set to `True`. Defaults to `None`.
+ If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
+ If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
+ be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
@@ -203,6 +208,7 @@ def convert_pbf_to_gpq(
self,
pbf_path: Union[str, Path],
result_file_path: Optional[Union[str, Path]] = None,
+ keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
@@ -215,10 +221,14 @@ def convert_pbf_to_gpq(
result_file_path (Union[str, Path], optional): Where to save
the geoparquet file. If not provided, will be generated based on hashes
from provided tags filter and geometry filter. Defaults to `None`.
+ keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
+ Whether to keep all tags related to the element, or return only those defined
+ in the `tags_filter`. When `True`, will override the optional grouping defined
+ in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
- If `None`, will be set based on `tags_filter` parameter.
- If no tags filter is provided, then `explode_tags` will set to `False`,
- if there is tags filter it will set to `True`. Defaults to `None`.
+ If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
+ If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
+ be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
diff --git a/tests/base/test_pbf_file_reader.py b/tests/base/test_pbf_file_reader.py
index 7e2682a..b1a1739 100644
--- a/tests/base/test_pbf_file_reader.py
+++ b/tests/base/test_pbf_file_reader.py
@@ -25,7 +25,7 @@
from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS, HEX2VEC_FILTER
from quackosm._constants import FEATURES_INDEX
-from quackosm._osm_tags_filters import OsmTagsFilter
+from quackosm._osm_tags_filters import GroupedOsmTagsFilter, OsmTagsFilter
from quackosm.pbf_file_reader import PbfFileReader
ut = TestCase()
@@ -140,6 +140,46 @@ def test_pbf_reader_features_ids_filtering(filter_osm_ids: list[str], expected_r
assert len(features_gdf) == expected_result_length
+@pytest.mark.parametrize( # type: ignore
+ "filter_osm_id,osm_tags_filter,keep_all_tags,expected_tags_keys",
+ [
+ ("way/389888402", {"building": "apartments"}, False, ["building"]),
+ (
+ "way/389888402",
+ {"building": "apartments"},
+ True,
+ [
+ "addr:city",
+ "addr:country",
+ "addr:housenumber",
+ "addr:postcode",
+ "addr:street",
+ "building",
+ "building:levels",
+ ],
+ ),
+ ],
+)
+def test_pbf_reader_proper_tags_reading(
+ filter_osm_id: str,
+ osm_tags_filter: Union[OsmTagsFilter, GroupedOsmTagsFilter],
+ keep_all_tags: bool,
+ expected_tags_keys: list[str],
+):
+ """Test proper tags tags reading in `PbfFileReader`."""
+ file_name = "monaco.osm.pbf"
+ features_gdf = PbfFileReader(tags_filter=osm_tags_filter).get_features_gdf(
+ file_paths=[Path(__file__).parent.parent / "test_files" / file_name],
+ ignore_cache=True,
+ filter_osm_ids=[filter_osm_id],
+ explode_tags=False,
+ keep_all_tags=keep_all_tags,
+ )
+ assert len(features_gdf) == 1
+ returned_tags_keys = list(features_gdf.iloc[0].tags.keys())
+ ut.assertListEqual(returned_tags_keys, expected_tags_keys)
+
+
# Copyright (C) 2011 by Hong Minhee ,
# Robert Kajic
# Copyright (C) 2020 by Salesforce.com, Inc