Skip to content

Commit

Permalink
feat: change minimal duckdb version (#153)
Browse files Browse the repository at this point in the history
* feat: bump duckdb version

* chore: change readme

* chore: change string to constant

* ci: removed conftest.py from build process

* chore: replace unary_union calls and silence warnings

* feat: refactor duckdb parquet operations

* chore: remove loading parquet extension

* fix: change geometry reading for empty relations

* chore: change lock hash

* chore: add explicit geometry casting

* chore: simplify tags loading with new duckdb engine
  • Loading branch information
RaczeQ authored Sep 24, 2024
1 parent fea4cfd commit 86c5884
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 121 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Bumped minimal DuckDB version to `1.1.0`
- Refactored geoparquet operations for compatibility with new DuckDB version
- Excluded `conftest.py` file from the final library build
- Replaced `unary_union` calls with `union_all()` on all GeoDataFrames
- Silenced `pooch` library warnings regarding empty SHA hash

## [0.10.0] - 2024-09-23

### Changed
Expand Down
14 changes: 4 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ QuackOSM supports **Python >= 3.9**

Required:

- `duckdb (>=0.10.2, <1.1.0)`: For all DuckDB operations on PBF files
- `duckdb (>=1.1.0)`: For all DuckDB operations on PBF files

- `pyarrow (>=16.0.0)`: For parquet files wrangling

Expand Down Expand Up @@ -148,9 +148,7 @@ way/993121275 {'building': 'yes', 'name': ... POLYGON ((7.43214 43.7481...
```python
>>> import duckdb
>>> duckdb.load_extension('spatial')
>>> duckdb.read_parquet(str(gpq_path)).project(
... "* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
... ).order("feature_id")
>>> duckdb.read_parquet(str(gpq_path)).order("feature_id")
┌──────────────────┬──────────────────────┬──────────────────────────────────────────────┐
│ feature_id │ tags │ geometry │
│ varchar │ map(varchar, varch… │ geometry │
Expand Down Expand Up @@ -270,9 +268,7 @@ way/998561139 {'barrier': 'bollard', 'bicyc... LINESTRING (12.45828 41.9...
```python
>>> import duckdb
>>> duckdb.load_extension('spatial')
>>> duckdb.read_parquet(str(gpq_path)).project(
... "* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
... ).order("feature_id")
>>> duckdb.read_parquet(str(gpq_path)).order("feature_id")
┌──────────────────┬──────────────────────┬──────────────────────────────────────────────┐
│ feature_id │ tags │ geometry │
│ varchar │ map(varchar, varch… │ geometry │
Expand Down Expand Up @@ -388,9 +384,7 @@ relation/3256168 {'building': 'yes', 'type': ... POLYGON ((12.46061 41.907...
```python
>>> import duckdb
>>> duckdb.load_extension('spatial')
>>> duckdb.read_parquet(str(gpq_path)).project(
... "* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
... ).order("feature_id")
>>> duckdb.read_parquet(str(gpq_path)).order("feature_id")
┌──────────────────┬────────────────────────────┬──────────────────────────────┐
│ feature_id │ tags │ geometry │
│ varchar │ map(varchar, varchar) │ geometry │
Expand Down
70 changes: 35 additions & 35 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies = [
"geopandas>=0.6",
"shapely>=2",
"pyarrow>=16.0.0",
"duckdb>=0.10.2,<1.1.0",
"duckdb>=1.1.0",
"geoarrow-pyarrow>=0.1.2",
"geoarrow-pandas>=0.1.1",
"typeguard>=3.0.0",
Expand Down Expand Up @@ -109,6 +109,9 @@ cli-dev = ["ipywidgets", "folium", "matplotlib>=3.2.0", "mapclassify"]
[tool.pdm.scripts]
post_install = "pre-commit install"

[tool.pdm.build]
excludes = ["quackosm/conftest.py"]

[tool.black]
line-length = 100
target-version = ["py39", "py310", "py311", "py312"]
Expand Down
8 changes: 4 additions & 4 deletions quackosm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
import geopandas as gpd

gdf = gpd.read_file(value)
return gdf.unary_union
return gdf.union_all()
except Exception:
raise typer.BadParameter("Cannot parse provided geo file") from None

Expand Down Expand Up @@ -140,7 +140,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
geometries.append(
box(minx=bounds["w"], miny=bounds["s"], maxx=bounds["e"], maxy=bounds["n"])
)
return gpd.GeoSeries(geometries).unary_union
return gpd.GeoSeries(geometries).union_all()
except Exception:
raise typer.BadParameter(f"Cannot parse provided Geohash value: {geohash}") from None

Expand All @@ -165,7 +165,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
geometries.append(
Polygon([coords[::-1] for coords in h3.cell_to_boundary(h3_cell.strip())])
)
return gpd.GeoSeries(geometries).unary_union
return gpd.GeoSeries(geometries).union_all()
except Exception as ex:
raise typer.BadParameter(f"Cannot parse provided H3 values: {value}") from ex

Expand All @@ -190,7 +190,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
geometries.append(
Polygon(s2.s2_to_geo_boundary(s2_index.strip(), geo_json_conformant=True))
)
return gpd.GeoSeries(geometries).unary_union
return gpd.GeoSeries(geometries).union_all()
except Exception:
raise typer.BadParameter(f"Cannot parse provided S2 value: {s2_index}") from None

Expand Down
6 changes: 5 additions & 1 deletion quackosm/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import duckdb
import pandas
import pytest
from pooch import get_logger as get_pooch_logger
from pooch import retrieve

from quackosm.osm_extracts.extract import OsmExtractSource
Expand Down Expand Up @@ -53,13 +54,15 @@ def add_pbf_files(doctest_namespace): # type: ignore
shutil.copy(pbf_file_path, geofabrik_pbf_file_path)



@pytest.fixture(autouse=True, scope="session")
def download_osm_extracts_indexes(): # type: ignore
"""Download OSM extract indexes files to cache."""
download_directory = Path("cache")
download_directory.mkdir(parents=True, exist_ok=True)

logger = get_pooch_logger()
logger.setLevel("WARNING")

for osm_extract in OsmExtractSource:
if osm_extract == OsmExtractSource.any:
continue
Expand All @@ -81,6 +84,7 @@ def install_spatial_extension(): # type: ignore
"""Install duckdb spatial extension."""
duckdb.install_extension("spatial")


@pytest.fixture(autouse=True, scope="session") # type: ignore
def pandas_terminal_width() -> None:
"""Change pandas dataframe display options."""
Expand Down
Loading

0 comments on commit 86c5884

Please sign in to comment.