Skip to content

Commit

Permalink
chore: added keep_all_tags parameter with a test
Browse files Browse the repository at this point in the history
  • Loading branch information
RaczeQ committed Jan 16, 2024
1 parent 027db6a commit e881efc
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 13 deletions.
24 changes: 18 additions & 6 deletions quackosm/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def convert_pbf_to_gpq(
tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None,
geometry_filter: Optional[BaseGeometry] = None,
result_file_path: Optional[Union[str, Path]] = None,
keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
Expand All @@ -47,10 +48,14 @@ def convert_pbf_to_gpq(
result_file_path (Union[str, Path], optional): Where to save
the geoparquet file. If not provided, will be generated based on hashes
from provided tags filter and geometry filter. Defaults to `None`.
keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
Whether to keep all tags related to the element, or return only those defined
in the `tags_filter`. When `True`, will override the optional grouping defined
in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
If `None`, will be set based on `tags_filter` parameter. If no tags filter is provided,
then `explode_tags` will set to `False`, if there is tags filter it will set to `True`.
Defaults to `None`.
If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
Expand Down Expand Up @@ -220,6 +225,7 @@ def convert_pbf_to_gpq(
).convert_pbf_to_gpq(
pbf_path=pbf_path,
result_file_path=result_file_path,
keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
filter_osm_ids=filter_osm_ids,
Expand All @@ -230,6 +236,7 @@ def get_features_gdf(
file_paths: Union[str, Path, Iterable[Union[str, Path]]],
tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None,
geometry_filter: Optional[BaseGeometry] = None,
keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
Expand Down Expand Up @@ -257,10 +264,14 @@ def get_features_gdf(
If `None`, handler will allow all of the tags to be parsed. Defaults to `None`.
geometry_filter (BaseGeometry, optional): Region which can be used to filter only
intersecting OSM objects. Defaults to `None`.
keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
Whether to keep all tags related to the element, or return only those defined
in the `tags_filter`. When `True`, will override the optional grouping defined
in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
If `None`, will be set based on `tags_filter` parameter. If no tags filter is provided,
then `explode_tags` will set to `False`, if there is tags filter it will set to `True`.
Defaults to `None`.
If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
Expand Down Expand Up @@ -392,6 +403,7 @@ def get_features_gdf(
osm_way_polygon_features_config=osm_way_polygon_features_config,
).get_features_gdf(
file_paths=file_paths,
keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
filter_osm_ids=filter_osm_ids,
Expand Down
22 changes: 16 additions & 6 deletions quackosm/pbf_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def __init__(
def get_features_gdf(
self,
file_paths: Union[str, Path, Iterable[Union[str, Path]]],
keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
Expand All @@ -155,10 +156,14 @@ def get_features_gdf(
Args:
file_paths (Union[str, Path, Iterable[Union[str, Path]]]):
Path or list of paths of `*.osm.pbf` files to be parsed.
keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
Whether to keep all tags related to the element, or return only those defined
in the `tags_filter`. When `True`, will override the optional grouping defined
in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
If `None`, will be set based on `tags_filter` parameter.
If no tags filter is provided, then `explode_tags` will set to `False`,
if there is tags filter it will set to `True`. Defaults to `None`.
If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
Expand Down Expand Up @@ -203,6 +208,7 @@ def convert_pbf_to_gpq(
self,
pbf_path: Union[str, Path],
result_file_path: Optional[Union[str, Path]] = None,
keep_all_tags: bool = False,
explode_tags: Optional[bool] = None,
ignore_cache: bool = False,
filter_osm_ids: Optional[list[str]] = None,
Expand All @@ -215,10 +221,14 @@ def convert_pbf_to_gpq(
result_file_path (Union[str, Path], optional): Where to save
the geoparquet file. If not provided, will be generated based on hashes
from provided tags filter and geometry filter. Defaults to `None`.
keep_all_tags (bool, optional): Works only with the `tags_filter` parameter.
Whether to keep all tags related to the element, or return only those defined
in the `tags_filter`. When `True`, will override the optional grouping defined
in the `tags_filter`. Defaults to `False`.
explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys.
If `None`, will be set based on `tags_filter` parameter.
If no tags filter is provided, then `explode_tags` will set to `False`,
if there is tags filter it will set to `True`. Defaults to `None`.
If `None`, will be set based on `tags_filter` and `keep_all_tags` parameters.
If there is tags filter defined and `keep_all_tags` is set to `False`, then it will
be set to `True`. Otherwise it will be set to `False`. Defaults to `None`.
ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not.
Defaults to False.
filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file.
Expand Down
42 changes: 41 additions & 1 deletion tests/base/test_pbf_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS, HEX2VEC_FILTER

from quackosm._constants import FEATURES_INDEX
from quackosm._osm_tags_filters import OsmTagsFilter
from quackosm._osm_tags_filters import GroupedOsmTagsFilter, OsmTagsFilter
from quackosm.pbf_file_reader import PbfFileReader

ut = TestCase()
Expand Down Expand Up @@ -140,6 +140,46 @@ def test_pbf_reader_features_ids_filtering(filter_osm_ids: list[str], expected_r
assert len(features_gdf) == expected_result_length


@pytest.mark.parametrize( # type: ignore
"filter_osm_id,osm_tags_filter,keep_all_tags,expected_tags_keys",
[
("way/389888402", {"building": "apartments"}, False, ["building"]),
(
"way/389888402",
{"building": "apartments"},
True,
[
"addr:city",
"addr:country",
"addr:housenumber",
"addr:postcode",
"addr:street",
"building",
"building:levels",
],
),
],
)
def test_pbf_reader_proper_tags_reading(
filter_osm_id: str,
osm_tags_filter: Union[OsmTagsFilter, GroupedOsmTagsFilter],
keep_all_tags: bool,
expected_tags_keys: list[str],
):
"""Test proper tags tags reading in `PbfFileReader`."""
file_name = "monaco.osm.pbf"
features_gdf = PbfFileReader(tags_filter=osm_tags_filter).get_features_gdf(
file_paths=[Path(__file__).parent.parent / "test_files" / file_name],
ignore_cache=True,
filter_osm_ids=[filter_osm_id],
explode_tags=False,
keep_all_tags=keep_all_tags,
)
assert len(features_gdf) == 1
returned_tags_keys = list(features_gdf.iloc[0].tags.keys())
ut.assertListEqual(returned_tags_keys, expected_tags_keys)


# Copyright (C) 2011 by Hong Minhee <http://dahlia.kr/>,
# Robert Kajic <http://github.com/kajic>
# Copyright (C) 2020 by Salesforce.com, Inc
Expand Down

0 comments on commit e881efc

Please sign in to comment.