diff --git a/.github/workflows/_tests.yml b/.github/workflows/_tests.yml new file mode 100644 index 0000000..ad5cf5b --- /dev/null +++ b/.github/workflows/_tests.yml @@ -0,0 +1,80 @@ +name: tests +on: workflow_call + +jobs: + run-tests: + name: Run tests πŸ› οΈ on multiple systems πŸ–₯️ and Python 🐍 versions + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.9", "3.10", "3.11"] + include: + - os: macos-latest + python-version: "3.11" + - os: windows-latest + python-version: "3.11" + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + steps: + - uses: actions/checkout@v3 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install GDAL (unix) + if: matrix.os != 'windows-latest' + run: | + $CONDA/bin/conda install -c conda-forge gdal + $CONDA/bin/ogr2ogr --version + - name: Install GDAL (windows) + if: matrix.os == 'windows-latest' + run: | + & $env:CONDA\Scripts\conda.exe install -c conda-forge gdal + & $env:CONDA\Library\bin\ogr2ogr.exe --version + - uses: pdm-project/setup-pdm@v3 + name: Setup PDM (Python ${{ matrix.python-version }}) + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + enable-pep582: true + cache: true + cache-dependency-path: "**/pdm.lock" + - name: Install dependencies + run: pdm install -d -G test --skip=post_install + - name: Cache Overpass data + uses: actions/cache@v3 + with: + path: cache + key: overpass-cache-${{ matrix.os }}-${{ matrix.python-version }} + - name: Cache tox runner + uses: actions/cache@v3 + with: + path: .tox + key: tox-cache-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/pdm.lock') }} + restore-keys: | + tox-cache-${{ matrix.os }}-${{ matrix.python-version }}- + - name: Run tests with tox (unix) + if: matrix.os != 'windows-latest' + run: | + PATH=$CONDA/bin:$PATH + pdm run tox -e python${{ matrix.python-version }} + - name: Run tests with tox (windows) + if: matrix.os == 'windows-latest' + run: | + $env:Path = "$env:CONDA\Library\bin;" + $env:Path + pdm run tox -e python${{ matrix.python-version }} + - name: Upload coverage to Codecov + uses: Wandalen/wretry.action@master + with: + action: codecov/codecov-action@v3 + with: | + env_vars: OS,PYTHON + fail_ci_if_error: true + files: ./coverage.python${{ matrix.python-version }}.xml, + flags: ${{ matrix.os }}-python${{ matrix.python-version }} + verbose: true + attempt_limit: 100 + attempt_delay: 10000 diff --git a/.github/workflows/bump-and-pr.yml b/.github/workflows/bump-and-pr.yml new file mode 100644 index 0000000..5dc40a0 --- /dev/null +++ b/.github/workflows/bump-and-pr.yml @@ -0,0 +1,72 @@ +name: "Bump ⬆️ library version and create PR" +on: + workflow_dispatch: + inputs: + bumpType: + description: "Bump type" + required: true + default: "patch" + type: choice + options: + - patch + - minor + - major + +env: + PYTHON_VERSION: 3.11 + +jobs: + bump-n-pr: + name: Bump ⬆️ and create a Pull Request with a new library version (${{ inputs.bumpType }}) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.CICD_PAT_TOKEN }} + - name: Configure Git user + run: | + git config --local user.name "Kraina CI/CD" + git config --local user.email "cicd@kraina.ai" + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + - uses: pdm-project/setup-pdm@v3 + name: Setup PDM + with: + python-version: ${{ env.PYTHON_VERSION }} + architecture: x64 + - name: Install dependencies + run: pdm install -d -G dev --skip=post_install + - name: Bump changelog version + id: changelog + uses: release-flow/keep-a-changelog-action@v2 + with: + command: bump + version: ${{ inputs.bumpType }} + tag-prefix: "" + keep-unreleased-section: True + - name: Run bumpver + run: | + pdm run bumpver update --allow-dirty --${{ inputs.bumpType }} -vvv + - name: Get commit message + id: commit_message + run: | + echo COMMIT_MESSAGE="$(git log -1 --pretty=%B | cat)" >> $GITHUB_OUTPUT + - name: Get new version + id: new_version + run: | + pdm run bumpver show -n -e | grep CURRENT_VERSION >> $GITHUB_OUTPUT + - name: Create Pull Request + uses: peter-evans/create-pull-request@v5 + with: + title: ${{ steps.commit_message.outputs.COMMIT_MESSAGE }} + branch: ${{ format('release/{0}', steps.new_version.outputs.CURRENT_VERSION) }} + labels: release + base: main + body: ${{ steps.changelog.outputs.release-notes }} + token: ${{ secrets.CICD_PAT_TOKEN }} + author: Kraina CI/CD + committer: Kraina CI/CD + commit-message: "docs: update CHANGELOG.md" diff --git a/.github/workflows/ci-dev.yml b/.github/workflows/ci-dev.yml new file mode 100644 index 0000000..3318d95 --- /dev/null +++ b/.github/workflows/ci-dev.yml @@ -0,0 +1,129 @@ +name: "Build & publish - DEV" +on: + workflow_run: + workflows: [Test - DEV] + types: + - completed + +jobs: + build-n-publish: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + name: Build and publish Python 🐍 distributions πŸ“¦ to TestPyPI + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - uses: pdm-project/setup-pdm@v3 + name: Setup PDM + with: + python-version: 3.11 + architecture: x64 + enable-pep582: true + cache: true + cache-dependency-path: "**/pdm.lock" + - run: pip install toml + - uses: jannekem/run-python-script-action@v1 + name: Rename test version + with: + script: | + import time + import toml + data = toml.load("pyproject.toml") + current_date = time.strftime("%Y%m%d%H%M%S") + data['project']['version']=f"{data['project']['version']}-{current_date}" + data['tool']['bumpver']['current_version']=f"{data['tool']['bumpver']['current_version']}-{current_date}" + f = open("pyproject.toml",'w') + toml.dump(data, f) + f.close() + - name: Publish distribution πŸ“¦ to Test PyPI + run: pdm publish --repository testpypi --username __token__ --password ${{ secrets.PYPI_TEST_API_TOKEN }} --comment ${{ github.sha }} --verbose + + generate-docs: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + name: Generate latest library πŸ“š documentation πŸ“„ + runs-on: ubuntu-latest + permissions: + contents: write + env: + MKDOCS_EXECUTE_JUPYTER: false # execution is done before rendering documentation + MKDOCS_DEV: true + steps: + - uses: actions/checkout@v3 + with: + token: ${{ secrets.CICD_PAT_TOKEN }} + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + - uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-dev-${{ hashFiles('**/pdm.lock') }} + restore-keys: | + ${{ runner.os }}-pip-dev- + - name: Install pdm + run: pip install pdm + - name: Generate requirements.txt + run: pdm export --no-default -G docs -G visualization -f requirements -o requirements.txt + - name: Install dependencies + run: pip install --no-deps -r requirements.txt + - name: Install nbconvert dependency + run: pip install jupyter nbconvert nbformat black + - name: Install quackosm + run: | + pdm build -v -d dist + pip install 'quackosm @ file://'"$(pwd)/$(find dist -name '*.whl')" + - name: Configure Git user + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + - name: Cache Overpass data + uses: actions/cache@v3 + with: + path: "**/cache" + key: mkdocs-overpass-dev-cache-${{ runner.os }} + - name: Execute jupyter notebooks + run: | + jupyter nbconvert --to notebook --inplace --execute $(find examples/ -type f -name "*.ipynb") --ExecutePreprocessor.kernel_name='python3' + - uses: jannekem/run-python-script-action@v1 + name: Replace copyright date + with: + script: | + import time + file_name = "mkdocs.yml" + with open(file_name) as f: + data = f.read().replace('{current_year}', time.strftime("%Y")) + with open(file_name, "w") as f: + f.write(data) + - name: Create remote for quackosm-docs repository + run: git remote add origin-quackosm-docs https://github.com/kraina-ai/quackosm-docs + - name: Fetch gh-pages branch + run: git fetch origin-quackosm-docs gh-pages --depth=1 + - name: Publish dev documentation + run: mike deploy --remote origin-quackosm-docs --rebase --push dev + + deploy-docs: + name: Deploy documentation πŸ“„ to Github Pages 🌐 + needs: [generate-docs] + runs-on: ubuntu-latest + permissions: + contents: write + pages: write + id-token: write + steps: + - uses: actions/checkout@v3 + with: + repository: 'kraina-ai/quackosm-docs' + ref: 'gh-pages' + token: ${{ secrets.CICD_PAT_TOKEN }} + - name: Upload artifact + uses: actions/upload-pages-artifact@v2 + with: + # Upload entire repository + path: '.' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.github/workflows/ci-prod.yml b/.github/workflows/ci-prod.yml new file mode 100644 index 0000000..2ec8a80 --- /dev/null +++ b/.github/workflows/ci-prod.yml @@ -0,0 +1,119 @@ +name: "Build & publish - PROD" +on: + release: + types: [released] + +jobs: + run-tests: + name: "Run tests job" + uses: ./.github/workflows/_tests.yml + + build-n-publish: + name: Build and publish Python 🐍 distributions πŸ“¦ to PyPI + needs: [run-tests] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - uses: pdm-project/setup-pdm@v3 + name: Setup PDM + with: + python-version: 3.11 + architecture: x64 + enable-pep582: true + cache: true + cache-dependency-path: "**/pdm.lock" + - name: Publish distribution πŸ“¦ to PyPI + run: pdm publish --repository pypi --username __token__ --password ${{ secrets.PYPI_API_TOKEN }} --verbose + + generate-docs: + name: Generate latest library πŸ“š documentation πŸ“„ + needs: [run-tests] + runs-on: ubuntu-latest + permissions: + contents: write + pages: write + id-token: write + env: + MKDOCS_EXECUTE_JUPYTER: false # execution is done before rendering documentation + MKDOCS_DEV: false + steps: + - uses: actions/checkout@v3 + with: + token: ${{ secrets.CICD_PAT_TOKEN }} + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + - uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-prod-${{ hashFiles('**/pdm.lock') }} + restore-keys: | + ${{ runner.os }}-pip-prod- + - name: Install pdm + run: pip install pdm + - name: Generate requirements.txt + run: pdm export --no-default -G docs -G visualization -f requirements -o requirements.txt + - name: Install dependencies + run: pip install --no-deps -r requirements.txt + - name: Install nbconvert dependency + run: pip install jupyter nbconvert nbformat black + - name: Install quackosm + run: | + pdm build -v -d dist + pip install 'quackosm @ file://'"$(pwd)/$(find dist -name '*.whl')" + - name: Configure Git user + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + - name: Cache Overpass data + uses: actions/cache@v3 + with: + path: "**/cache" + key: mkdocs-overpass-dev-cache-${{ runner.os }} + - name: Execute jupyter notebooks + run: | + jupyter nbconvert --to notebook --inplace --execute $(find examples/ -type f -name "*.ipynb") --ExecutePreprocessor.kernel_name='python3' + - uses: jannekem/run-python-script-action@v1 + name: Replace copyright date + with: + script: | + import time + file_name = "mkdocs.yml" + with open(file_name) as f: + data = f.read().replace('{current_year}', time.strftime("%Y")) + with open(file_name, "w") as f: + f.write(data) + - name: Create remote for quackosm-docs repository + run: git remote add origin-quackosm-docs https://github.com/kraina-ai/quackosm-docs + - name: Fetch gh-pages branch + run: git fetch origin-quackosm-docs gh-pages --depth=1 + - name: Publish latest documentation + run: mike deploy --remote origin-quackosm-docs --rebase --push --update-aliases ${{github.ref_name}} latest + + deploy-docs: + name: Deploy documentation πŸ“„ to Github Pages 🌐 + needs: [generate-docs] + runs-on: ubuntu-latest + permissions: + contents: write + pages: write + id-token: write + steps: + - uses: actions/checkout@v3 + with: + repository: 'kraina-ai/quackosm-docs' + ref: 'gh-pages' + token: ${{ secrets.CICD_PAT_TOKEN }} + - name: Upload artifact + uses: actions/upload-pages-artifact@v2 + with: + # Upload entire repository + path: '.' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.github/workflows/gh-release.yml b/.github/workflows/gh-release.yml new file mode 100644 index 0000000..26a9a4a --- /dev/null +++ b/.github/workflows/gh-release.yml @@ -0,0 +1,51 @@ +name: "GitHub release" +on: + workflow_run: + workflows: [Test - DEV] + types: + - completed + +permissions: + actions: 'write' + +jobs: + github-release: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + name: Create a GitHub release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.CICD_PAT_TOKEN }} + - name: Get last commit message + id: get-last-commit-message + run: echo COMMIT_MESSAGE="$(git log -1 --pretty=%B | cat | head -n 1)" >> "$GITHUB_OUTPUT" + - name: Cancel run if not a release commit + run: | + # done this way instead of "exit 1" to avoid having a failed run in the logs + IS_RELEASE_COMMIT=${{ contains(steps.get-last-commit-message.outputs.COMMIT_MESSAGE, format('chore(CI/CD){0} bump version', ':')) }} + if ! $IS_RELEASE_COMMIT; then + gh run cancel ${{ github.run_id }} + gh run watch ${{ github.run_id }} + fi + env: + GH_TOKEN: ${{ secrets.CICD_PAT_TOKEN }} + - name: Configure Git user + run: | + git config --local user.name "Kraina CI/CD" + git config --local user.email "cicd@kraina.ai" + - name: Extract release notes + id: extract-release-notes + uses: ffurrer2/extract-release-notes@v1 + - name: Extract version + id: extract-version + uses: winterjung/split@v2 + with: + msg: ${{ steps.get-last-commit-message.outputs.COMMIT_MESSAGE }} + - name: Release + uses: softprops/action-gh-release@v1 + with: + token: ${{ secrets.CICD_PAT_TOKEN }} + tag_name: ${{ steps.extract-version.outputs._5 }} + body: ${{ steps.extract-release-notes.outputs.release_notes }} diff --git a/.github/workflows/run-changelog-enforcer.yml b/.github/workflows/run-changelog-enforcer.yml new file mode 100644 index 0000000..2675553 --- /dev/null +++ b/.github/workflows/run-changelog-enforcer.yml @@ -0,0 +1,22 @@ +name: "Run changelog enforcer" +on: + pull_request: + # The specific activity types are listed here to include "labeled" and "unlabeled" + # (which are not included by default for the "pull_request" trigger). + # This is needed to allow skipping enforcement of the changelog in PRs with specific labels, + # as defined in the (optional) "skipLabels" property. + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + run-changelog-enforcer: + name: Run changelog enforcer + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dangoslen/changelog-enforcer@v3 diff --git a/.github/workflows/run-manual-pre-commit.yml b/.github/workflows/run-manual-pre-commit.yml new file mode 100644 index 0000000..5ae013b --- /dev/null +++ b/.github/workflows/run-manual-pre-commit.yml @@ -0,0 +1,23 @@ +name: "Run manual pre-commit stage" +on: + pull_request: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + run-manual-stage: + name: Run pre-commit manual stage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - uses: pre-commit/action@v3.0.0 + with: + extra_args: --all-files --hook-stage manual --verbose diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml new file mode 100644 index 0000000..fcaeb3e --- /dev/null +++ b/.github/workflows/run-tests.yml @@ -0,0 +1,14 @@ +name: "Run tests workflow" +on: + pull_request: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + run-tests: + name: "Run tests job" + uses: ./.github/workflows/_tests.yml diff --git a/.github/workflows/test-dev.yml b/.github/workflows/test-dev.yml new file mode 100644 index 0000000..05d6eec --- /dev/null +++ b/.github/workflows/test-dev.yml @@ -0,0 +1,10 @@ +name: "Test - DEV" +on: + push: + branches: + - main + +jobs: + run-tests: + name: "Run tests job" + uses: ./.github/workflows/_tests.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1f3f566..4489ac8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,10 +12,6 @@ repos: hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - - repo: https://github.com/psf/black - rev: 23.7.0 - hooks: - - id: black-jupyter - repo: https://github.com/PyCQA/docformatter rev: v1.7.5 hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9afa765 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.0] - 2023-12-28 + +### Added + +- Created QuackOSM repository +- Implemented PbfFileReader + +[Unreleased]: https://github.com/kraina-ai/quackosm/compare/0.1.0...HEAD + +[0.0.1]: https://github.com/kraina-ai/quackosm/releases/tag/0.1.0 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7afc24d --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +SHELL := /bin/bash + +install: + pdm install -dG:all + +docs: + mkdocs serve --livereload -w quackosm + +test: + pytest -n auto + +.PHONY: install docs test diff --git a/docs/.pages b/docs/.pages new file mode 100644 index 0000000..ea746d5 --- /dev/null +++ b/docs/.pages @@ -0,0 +1,4 @@ +nav: + - Home: README.md + - Examples: examples + - ... diff --git a/docs/api/PbfFileReader.md b/docs/api/PbfFileReader.md new file mode 100644 index 0000000..c91509d --- /dev/null +++ b/docs/api/PbfFileReader.md @@ -0,0 +1,4 @@ +# PbfFileReader +::: quackosm.pbf_file_reader.PbfFileReader + options: + show_root_heading: true diff --git a/docs/api/QuackOSM.md b/docs/api/QuackOSM.md new file mode 100644 index 0000000..2ca4898 --- /dev/null +++ b/docs/api/QuackOSM.md @@ -0,0 +1,4 @@ +# QuackOSM +::: quackosm.functions + options: + show_root_heading: false diff --git a/docs/assets/css/docstrings.css b/docs/assets/css/docstrings.css new file mode 100644 index 0000000..9d596fc --- /dev/null +++ b/docs/assets/css/docstrings.css @@ -0,0 +1,61 @@ +.doc-class > .doc-heading-code, +.doc-function > .doc-heading-code { + font-size: 1rem !important; + margin-top: -1em !important; + display: flex; +} + +.doc-class > .doc-heading-code .highlight, +.doc-function > .doc-heading-code .highlight { + flex-grow: 1; +} + +.doc-class > .doc-heading-code a, +.doc-function > .doc-heading-code a { + margin-top: auto; + margin-bottom: auto; +} + +.doc-class > .doc-labels, +.doc-function > .doc-labels { + margin-top: -1.2rem; + display: block; + font-size: 1.2em; +} + +/* Indentation. */ +div.doc-contents:not(.first) { + padding-left: 25px; + border-left: 0.05rem solid var(--md-typeset-table-color); +} + +/* Mark external links as such. */ +a.external::after, +a.autorefs-external::after { + /* https://primer.style/octicons/arrow-up-right-24 */ + mask-image: url('data:image/svg+xml,'); + -webkit-mask-image: url('data:image/svg+xml,'); + content: " "; + + display: inline-block; + vertical-align: middle; + position: relative; + + height: 1em; + width: 1em; + background-color: var(--md-typeset-a-color); +} + +a.external:hover::after, +a.autorefs-external:hover::after { + background-color: var(--md-accent-fg-color); +} + +table.highlighttable td.linenos, +span.linenos, +.highlight .gp, .highlight .go { + user-select: none; + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; +} diff --git a/docs/assets/css/font.css b/docs/assets/css/font.css new file mode 100644 index 0000000..61a2cc1 --- /dev/null +++ b/docs/assets/css/font.css @@ -0,0 +1,8 @@ +@import url("https://fonts.googleapis.com/css2?family=Playpen+Sans:wght@500&display=swap"); + +.md-header__topic:first-child { + font-family: "Playpen Sans"; + font-weight: 500; + font-size: 1.1rem; + color: var(--quackosm-text-color); +} diff --git a/docs/assets/css/jupyter.css b/docs/assets/css/jupyter.css new file mode 100644 index 0000000..26d3fd6 --- /dev/null +++ b/docs/assets/css/jupyter.css @@ -0,0 +1,53 @@ +.jp-CodeCell > .jp-Cell-inputWrapper { + direction: rtl; + width: 113%; +} + +.jp-InputArea-prompt { + visibility: hidden; +} + +.jp-OutputArea-prompt { + visibility: hidden; + background-color: red; + position: absolute; + right: 0; +} + +.jp-CodeCell > .jp-Cell-outputWrapper { + margin-top: -10px; + padding-top: 0; + display: table-cell; + text-align: left; +} +.jp-Cell-outputWrapper > .jp-Cell-outputCollapser { + margin-top: -17px; +} + +.jupyter-wrapper table.dataframe tr, +.jupyter-wrapper table.dataframe th, +.jupyter-wrapper table.dataframe td { + text-align: left; +} +.jupyter-wrapper table.dataframe { + table-layout: auto; +} + +.jp-RenderedImage.jp-OutputArea-output { + text-align: center; +} + +.jupyter-wrapper .zeroclipboard-container { + z-index: 3 !important; +} + +div.jp-RenderedText.jp-OutputArea-output.jp-OutputArea-executeResult > pre { + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, + "Liberation Mono", "Courier New", monospace, SFMono-Regular, Consolas, Menlo, + monospace; + white-space: pre; +} + +.jupyter-wrapper .jp-Notebook { + overflow: hidden !important; +} diff --git a/docs/assets/css/logo.css b/docs/assets/css/logo.css new file mode 100644 index 0000000..461d02b --- /dev/null +++ b/docs/assets/css/logo.css @@ -0,0 +1,7 @@ +.md-header__button.md-logo { + padding: 0.1rem; +} + +.md-header__button.md-logo img, .md-header__button.md-logo svg { + height: 1.8rem; +} diff --git a/docs/assets/css/palette.css b/docs/assets/css/palette.css new file mode 100644 index 0000000..c02f0f3 --- /dev/null +++ b/docs/assets/css/palette.css @@ -0,0 +1,7 @@ +:root > * { + --quackosm-brand-color: #00509d; + --quackosm-text-color: #ffd500; + --md-primary-fg-color: var(--quackosm-brand-color); + --md-primary-fg-color--light: var(--quackosm-brand-color); + --md-primary-fg-color--dark: var(--quackosm-brand-color); +} diff --git a/docs/assets/logos/favicon.ico b/docs/assets/logos/favicon.ico new file mode 100644 index 0000000..3d32214 Binary files /dev/null and b/docs/assets/logos/favicon.ico differ diff --git a/docs/copy_examples.py b/docs/copy_examples.py new file mode 100644 index 0000000..0455408 --- /dev/null +++ b/docs/copy_examples.py @@ -0,0 +1,33 @@ +"""Examples copying utility function.""" + +from pathlib import Path + +import mkdocs_gen_files + +EXAMPLES_DIRECTORY_PATH = Path("examples") + + +def write_file(file_path: Path) -> None: + """ + Copies file from examples directory into mkdocs scope. + + Args: + file_path (Path): Current file path. + """ + root_path = file_path.relative_to(".") + print(f"Copying {root_path} file to {root_path}") + with root_path.open("rb") as src, mkdocs_gen_files.open(root_path, "wb") as dst: + dst.write(src.read()) + + +banned_directories = ["cache", "files", "example_files", "__pycache__", "lightning_logs"] +for i in EXAMPLES_DIRECTORY_PATH.glob("**/*"): + if i.is_file(): + should_copy = True + for banned_directory in banned_directories: + if banned_directory in i.parts: + should_copy = False + break + + if should_copy: + write_file(i) diff --git a/docs/copy_readme.py b/docs/copy_readme.py new file mode 100644 index 0000000..c22b6f9 --- /dev/null +++ b/docs/copy_readme.py @@ -0,0 +1,8 @@ +"""Readme copying utility function.""" + +from pathlib import Path + +import mkdocs_gen_files + +with Path("README.md").open("rb") as src, mkdocs_gen_files.open("README.md", "wb") as dst: + dst.write(src.read()) diff --git a/docs/gen_ref_pages.py b/docs/gen_ref_pages.py new file mode 100644 index 0000000..951a934 --- /dev/null +++ b/docs/gen_ref_pages.py @@ -0,0 +1,41 @@ +"""Generate the code reference pages.""" + +import os +from pathlib import Path + +import mkdocs_gen_files + +FULL_API_DIRECTORY_PATH = Path("full_api") + +nav = mkdocs_gen_files.Nav() + +is_dev = os.getenv("MKDOCS_DEV", "true").lower() == "true" + +if is_dev: + for path in sorted(Path("quackosm").rglob("*.py")): + module_path = path.relative_to("quackosm").with_suffix("") + doc_path = path.relative_to("quackosm").with_suffix(".md") + full_doc_path = Path(FULL_API_DIRECTORY_PATH, doc_path) + + parts = list(module_path.parts) + + if parts[-1] == "__init__": + parts = parts[:-1] + doc_path = doc_path.with_name("index.md") + full_doc_path = full_doc_path.with_name("index.md") + elif parts[-1] == "__main__": + continue + + if not parts: + continue + + nav[parts] = doc_path.as_posix() + + with mkdocs_gen_files.open(full_doc_path, "w") as fd: + identifier = ".".join(parts) + print(f"::: {identifier}", file=fd) + + mkdocs_gen_files.set_edit_path(full_doc_path, path) + + with mkdocs_gen_files.open(f"{FULL_API_DIRECTORY_PATH}/README.md", "w") as nav_file: + nav_file.writelines(nav.build_literate_nav()) diff --git a/docs/javascripts/copy_to_clipboard_patch.js b/docs/javascripts/copy_to_clipboard_patch.js new file mode 100644 index 0000000..8a031ee --- /dev/null +++ b/docs/javascripts/copy_to_clipboard_patch.js @@ -0,0 +1,38 @@ +const SELECTABILITY_PROPERTIES = [ + "user-select", + "-webkit-user-select", + "-ms-user-select", + "-moz-user-select" +]; + +document$.subscribe(function () { + makeButtonsCopySelectableOnly(); +}) + +function makeButtonsCopySelectableOnly() { + const buttonsToFix = document.querySelectorAll(".highlight button.md-clipboard"); + buttonsToFix.forEach((button) => { + button.dataset.clipboardText = extractText(button.dataset.clipboardTarget); + }); +} + +function extractText(selector) { + const element = document.querySelector(selector); + return Array.from(element.childNodes) + .filter(child => includeInOutput(child)) + .map(child => child.textContent) + .join("") + .trimEnd(); +} + +function includeInOutput(element) { + if (element instanceof Element) { + return isSelectable(element); + } + return true; +} + +function isSelectable(element) { + const childStyle = window.getComputedStyle(element); + return !SELECTABILITY_PROPERTIES.some((prop) => childStyle.getPropertyValue(prop) == "none"); +} diff --git a/docs/templates/python/material/class.html b/docs/templates/python/material/class.html new file mode 100644 index 0000000..e7907d9 --- /dev/null +++ b/docs/templates/python/material/class.html @@ -0,0 +1,140 @@ +{{ log.debug("Rendering " + class.path) }} + +
+{% with obj = class, html_id = class.path %} + + {% if root %} + {% set show_full_path = config.show_root_full_path %} + {% set root_members = True %} + {% elif root_members %} + {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %} + {% set root_members = False %} + {% else %} + {% set show_full_path = config.show_object_full_path %} + {% endif %} + + {% set class_name = class.path if show_full_path else class.name %} + + {% set heading_classes = "doc doc-heading" if config.separate_signature else "doc doc-heading-code" %} + + {% if not root or config.show_root_heading %} + + {% filter heading(heading_level, + role="class", + id=html_id, + class=heading_classes, + toc_label=class.name) %} + + {% block heading scoped %} + {% if config.separate_signature %} + {{ class_name }} + {% elif config.merge_init_into_class and "__init__" in class.all_members %} + {% with function = class.all_members["__init__"] %} + {%+ filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %} + {{ class_name }} + {% endfilter %} + {% endwith %} + {% else %} + {{ class_name|highlight(language="python", linenums=False) }} + {% endif %} + {% endblock heading %} + + {% endfilter %} + + {% block labels scoped %} + {% with labels = class.labels %} + {% include "labels.html" with context %} + {% endwith %} + {% endblock labels %} + + {% block signature scoped %} + {% if config.separate_signature and config.merge_init_into_class %} + {% if "__init__" in class.all_members %} + {% with function = class.all_members["__init__"] %} + {% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %} + {{ class.name }} + {% endfilter %} + {% endwith %} + {% endif %} + {% endif %} + {% endblock signature %} + + {% else %} + {% if config.show_root_toc_entry %} + {% filter heading(heading_level, + role="class", + id=html_id, + toc_label=class.name, + hidden=True) %} + {% endfilter %} + {% endif %} + {% set heading_level = heading_level - 1 %} + {% endif %} + +
+ {% block contents scoped %} + {% block bases scoped %} + {% if config.show_bases and class.bases %} +

+ Bases: {% for expression in class.bases -%} + {% include "expression.html" with context %}{% if not loop.last %}, {% endif %} + {% endfor -%} +

+ {% endif %} + {% endblock bases %} + + {% block docstring scoped %} + {% with docstring_sections = class.docstring.parsed %} + {% include "docstring.html" with context %} + {% endwith %} + {% if config.merge_init_into_class %} + {% if "__init__" in class.all_members and class.all_members["__init__"].has_docstring %} + {% with docstring_sections = class.all_members["__init__"].docstring.parsed %} + {% include "docstring.html" with context %} + {% endwith %} + {% endif %} + {% endif %} + {% endblock docstring %} + + {% block source scoped %} + {% if config.show_source %} + {% if config.merge_init_into_class %} + {% if "__init__" in class.all_members and class.all_members["__init__"].source %} + {% with init = class.all_members["__init__"] %} +
+ Source code in + {%- if init.relative_filepath.is_absolute() -%} + {{ init.relative_package_filepath }} + {%- else -%} + {{ init.relative_filepath }} + {%- endif -%} + + {{ init.source|highlight(language="python", linestart=init.lineno, linenums=True) }} +
+ {% endwith %} + {% endif %} + {% elif class.source %} +
+ Source code in + {%- if class.relative_filepath.is_absolute() -%} + {{ class.relative_package_filepath }} + {%- else -%} + {{ class.relative_filepath }} + {%- endif -%} + + {{ class.source|highlight(language="python", linestart=class.lineno, linenums=True) }} +
+ {% endif %} + {% endif %} + {% endblock source %} + + {% block children scoped %} + {% set root = False %} + {% set heading_level = heading_level + 1 %} + {% include "children.html" with context %} + {% endblock children %} + {% endblock contents %} +
+ +{% endwith %} +
diff --git a/docs/templates/python/material/function.html b/docs/templates/python/material/function.html new file mode 100644 index 0000000..be7c49c --- /dev/null +++ b/docs/templates/python/material/function.html @@ -0,0 +1,94 @@ +{{ log.debug("Rendering " + function.path) }} + +{% import "language.html" as lang with context %} + +
+{% with obj = function, html_id = function.path %} + + {% if root %} + {% set show_full_path = config.show_root_full_path %} + {% set root_members = True %} + {% elif root_members %} + {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %} + {% set root_members = False %} + {% else %} + {% set show_full_path = config.show_object_full_path %} + {% endif %} + + {% set function_name = function.path if show_full_path else function.name %} + + {% set heading_classes = "doc doc-heading" if config.separate_signature else "doc doc-heading-code" %} + + {% if not root or config.show_root_heading %} + + {% filter heading(heading_level, + role="function", + id=html_id, + class=heading_classes, + toc_label=function.name ~ "()") %} + + {% block heading scoped %} + {% if config.separate_signature %} + {{ function_name }} + {% else %} + {%+ filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %} + {{ function_name }} + {% endfilter %} + {% endif %} + {% endblock heading %} + + {% endfilter %} + + {% block labels scoped %} + {% with labels = function.labels %} + {% include "labels.html" with context %} + {% endwith %} + {% endblock labels %} + + {% block signature scoped %} + {% if config.separate_signature %} + {% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %} + {{ function.name }} + {% endfilter %} + {% endif %} + {% endblock signature %} + + {% else %} + {% if config.show_root_toc_entry %} + {% filter heading(heading_level, + role="function", + id=html_id, + toc_label=function.name, + hidden=True) %} + {% endfilter %} + {% endif %} + {% set heading_level = heading_level - 1 %} + {% endif %} + +
+ {% block contents scoped %} + {% block docstring scoped %} + {% with docstring_sections = function.docstring.parsed %} + {% include "docstring.html" with context %} + {% endwith %} + {% endblock docstring %} + + {% block source scoped %} + {% if config.show_source and function.source %} +
+ {{ lang.t("Source code in") }} + {%- if function.relative_filepath.is_absolute() -%} + {{ function.relative_package_filepath }} + {%- else -%} + {{ function.relative_filepath }} + {%- endif -%} + + {{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }} +
+ {% endif %} + {% endblock source %} + {% endblock contents %} +
+ +{% endwith %} +
diff --git a/examples/pbf_file_reader.ipynb b/examples/pbf_file_reader.ipynb new file mode 100644 index 0000000..8c433c8 --- /dev/null +++ b/examples/pbf_file_reader.ipynb @@ -0,0 +1,341 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PBF File Reader\n", + "\n", + "`PBFFileReader` can really quickly parse full OSM extract in the form of `*.osm.pbf` file.\n", + "\n", + "It uses `DuckDB` with `spatial` extension to convert `pbf` files into `geoparquet` files without GDAL dependency.\n", + "\n", + "Reader can filter objects by geometry and by OSM tags with option to split tags into columns or keep it as a single dictionary.\n", + "\n", + "Caching strategy is implemented to reduce computations, but it can be overriden using `ignore_cache` parameter." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download all buildings in ReykjavΓ­k, Iceland\n", + "\n", + "Filtering the data with geometry and by tags, with tags in exploded form" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from quackosm import PbfFileReader\n", + "import urllib.request\n", + "import osmnx as ox" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iceland_pbf_url = \"https://download.geofabrik.de/europe/iceland-latest.osm.pbf\"\n", + "iceland_pbf_file = \"iceland.osm.pbf\"\n", + "urllib.request.urlretrieve(iceland_pbf_url, iceland_pbf_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reykjavik_gdf = ox.geocode_to_gdf(\"ReykjavΓ­k, IS\")\n", + "reykjavik_gdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reader = PbfFileReader(\n", + " geometry_filter=reykjavik_gdf.geometry.iloc[0], tags_filter={\"building\": True}\n", + ")\n", + "\n", + "reykjavik_buildings_gpq = reader.convert_pbf_to_gpq(\"iceland.osm.pbf\")\n", + "reykjavik_buildings_gpq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read those features using DuckDB" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "\n", + "connection = duckdb.connect()\n", + "\n", + "connection.load_extension(\"parquet\")\n", + "connection.load_extension(\"spatial\")\n", + "\n", + "features_relation = connection.read_parquet(str(reykjavik_buildings_gpq)).project(\n", + " \"* REPLACE (ST_GeomFromWKB(geometry) AS geometry)\"\n", + ")\n", + "features_relation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Count all buildings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features_relation.count(\"feature_id\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download main roads for Estonia\n", + "Filtering the data only by tags, with tags in exploded form" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "highways_filter = {\n", + " \"highway\": [\n", + " \"motorway\",\n", + " \"trunk\",\n", + " \"primary\",\n", + " \"secondary\",\n", + " \"tertiary\",\n", + " ]\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "estonia_pbf_url = \"http://download.geofabrik.de/europe/estonia-latest.osm.pbf\"\n", + "estonia_pbf_file = \"estonia.osm.pbf\"\n", + "urllib.request.urlretrieve(estonia_pbf_url, estonia_pbf_file)\n", + "\n", + "reader = PbfFileReader(geometry_filter=None, tags_filter=highways_filter)\n", + "estonia_features_gpq = reader.convert_pbf_to_gpq(estonia_pbf_file)\n", + "estonia_features_gpq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features_relation = connection.read_parquet(str(estonia_features_gpq)).project(\n", + " \"* REPLACE (ST_GeomFromWKB(geometry) AS geometry)\"\n", + ")\n", + "features_relation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Count loaded roads" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features_relation.count(\"feature_id\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculate roads length\n", + "We will transform the geometries to the Estonian CRS - [EPSG:3301](https://epsg.io/3301)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "length_in_meters = (\n", + " features_relation.project(\n", + " \"ST_Length(ST_Transform(geometry, 'EPSG:4326', 'EPSG:3301')) AS road_length\"\n", + " )\n", + " .sum(\"road_length\")\n", + " .fetchone()[0]\n", + ")\n", + "length_in_km = length_in_meters / 1000\n", + "length_in_km" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot the roads using GeoPandas\n", + "\n", + "With fast loading of geoparquet files using `geoarrow.pyarrow` library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import geoarrow.pyarrow as ga\n", + "from geoarrow.pyarrow import io\n", + "\n", + "from quackosm._constants import GEOMETRY_COLUMN\n", + "\n", + "parquet_table = io.read_geoparquet_table(estonia_features_gpq)\n", + "ga.to_geopandas(parquet_table.column(GEOMETRY_COLUMN)).plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download all data for Liechtenstein\n", + "Without filtering, with tags in a compact form" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "liechtenstein_pbf_url = \"https://download.geofabrik.de/europe/liechtenstein-latest.osm.pbf\"\n", + "liechtenstein_pbf_file = \"liechtenstein.osm.pbf\"\n", + "urllib.request.urlretrieve(liechtenstein_pbf_url, liechtenstein_pbf_file)\n", + "\n", + "# Here explode_tags is set to False explicitly,\n", + "# but it would set automatically when not filtering the data\n", + "reader = PbfFileReader(geometry_filter=None, tags_filter=None)\n", + "liechtenstein_features_gpq = reader.convert_pbf_to_gpq(\n", + " liechtenstein_pbf_file, explode_tags=False\n", + ") \n", + "liechtenstein_features_gpq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features_relation = connection.read_parquet(str(liechtenstein_features_gpq)).project(\n", + " \"* REPLACE (ST_GeomFromWKB(geometry) AS geometry)\"\n", + ")\n", + "features_relation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Return data as GeoDataFrame\n", + "\n", + "`PbfFileReader` can also return the data in the GeoDataFrame form.\n", + "\n", + "Here the caching strategy will be utilized - file won't be transformed again." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features_gdf = reader.get_features_gdf(liechtenstein_pbf_file)\n", + "features_gdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot the forests using GeoPandas\n", + "\n", + "Filter all polygons and features with `landuse`=`forest`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features_gdf[\n", + " features_gdf.geom_type.isin((\"Polygon\", \"MultiPolygon\"))\n", + " & features_gdf.tags.apply(lambda x: \"landuse\" in x and x[\"landuse\"] == \"forest\")\n", + "].plot(color=\"green\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "vscode": { + "interpreter": { + "hash": "4153976b658cb8b76d04b10dc7a0c871c2dac1d3dcfe690ad61d83a61969a12e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..dc723cc --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,138 @@ +site_name: QuackOSM +site_url: null + +repo_url: https://github.com/kraina-ai/quackosm +repo_name: kraina-ai/quackosm + +edit_uri: "edit/main/docs/" + +watch: + - examples + + +theme: + name: material + features: + - navigation.tabs + - navigation.tabs.sticky + - navigation.instant + - navigation.tracking + - navigation.top + - navigation.indexes + - navigation.path + - toc.follow + - content.code.annotate + - content.action.edit + - content.action.view + - content.code.copy + favicon: assets/logos/favicon.ico + logo: assets/logos/quackosm_logo.png + icon: + repo: material/github + palette: + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: blue grey + accent: amber + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: blue grey + accent: amber + toggle: + icon: material/brightness-4 + name: Switch to light mode + +extra: + consent: + title: Cookie consent + description: >- + We use cookies to recognize your repeated visits and preferences, as well + as to measure the effectiveness of our documentation and whether users + find what they're searching for. With your consent, you're helping us to + make our documentation better. + cookies: + github: + name: Github + checked: true + actions: + - accept + - manage + version: + provider: mike + +extra_css: + - assets/css/docstrings.css + - assets/css/font.css + - assets/css/palette.css + - assets/css/jupyter.css + - assets/css/logo.css + +extra_javascript: + - javascripts/copy_to_clipboard_patch.js + +copyright: > + Copyright © 2022 - {current_year} Kraina AI – + Change cookie settings + +plugins: + - mike: + canonical_version: "latest" + version_selector: true + - gen-files: + scripts: + - docs/copy_readme.py + - docs/copy_examples.py + - docs/gen_ref_pages.py + - search + - mkdocstrings: + custom_templates: docs/templates + handlers: + python: + paths: [quackosm] + options: + show_source: true + show_root_heading: false + show_root_toc_entry: false + docstring_section_style: "spacy" + show_signature: true + show_signature_annotations: false + line_length: 60 + members_order: "source" + docstring_options: + ignore_init_summary: true + merge_init_into_class: true + inherited_members: true + show_root_full_path: true + show_root_members_full_path: false + show_object_full_path: false + - mkdocs-jupyter: + include: ["*.ipynb"] + ignore_h1_titles: true + execute: !ENV [MKDOCS_EXECUTE_JUPYTER, false] + include_source: false + allow_errors: false + - mermaid2 + - awesome-pages + +markdown_extensions: + - attr_list + - md_in_html + - toc: + permalink: true + - pymdownx.magiclink + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets: + check_paths: true + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format diff --git a/pdm.lock b/pdm.lock index a372424..b9f100d 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "docs", "license", "lint", "test"] strategy = ["cross_platform"] lock_version = "4.4" -content_hash = "sha256:8b580cb87bcb1ff3f6ec3c600c12d4d32541b469d44632e94d43c6e8354755ba" +content_hash = "sha256:1f9ff663384a9d72847cf8a5b920dc5bba48999dd5d1ac4b1e01202b6995ae15" [[package]] name = "appnope" @@ -61,6 +61,41 @@ files = [ {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, ] +[[package]] +name = "black" +version = "23.12.1" +requires_python = ">=3.8" +summary = "The uncompromising code formatter." +dependencies = [ + "click>=8.0.0", + "mypy-extensions>=0.4.3", + "packaging>=22.0", + "pathspec>=0.9.0", + "platformdirs>=2", + "tomli>=1.1.0; python_version < \"3.11\"", + "typing-extensions>=4.0.1; python_version < \"3.11\"", +] +files = [ + {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, + {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, + {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"}, + {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"}, + {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"}, + {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"}, + {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"}, + {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"}, + {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"}, + {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"}, + {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"}, + {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"}, + {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"}, + {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"}, + {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"}, + {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"}, + {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"}, + {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"}, +] + [[package]] name = "bleach" version = "6.1.0" @@ -343,6 +378,71 @@ files = [ {file = "comm-0.2.0.tar.gz", hash = "sha256:a517ea2ca28931c7007a7a99c562a0fa5883cfb48963140cf642c41c948498be"}, ] +[[package]] +name = "contourpy" +version = "1.2.0" +requires_python = ">=3.9" +summary = "Python library for calculating contours of 2D quadrilateral grids" +dependencies = [ + "numpy<2.0,>=1.20", +] +files = [ + {file = "contourpy-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0274c1cb63625972c0c007ab14dd9ba9e199c36ae1a231ce45d725cbcbfd10a8"}, + {file = "contourpy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ab459a1cbbf18e8698399c595a01f6dcc5c138220ca3ea9e7e6126232d102bb4"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fdd887f17c2f4572ce548461e4f96396681212d858cae7bd52ba3310bc6f00f"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d16edfc3fc09968e09ddffada434b3bf989bf4911535e04eada58469873e28e"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c203f617abc0dde5792beb586f827021069fb6d403d7f4d5c2b543d87edceb9"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b69303ceb2e4d4f146bf82fda78891ef7bcd80c41bf16bfca3d0d7eb545448aa"}, + {file = "contourpy-1.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:884c3f9d42d7218304bc74a8a7693d172685c84bd7ab2bab1ee567b769696df9"}, + {file = "contourpy-1.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4a1b1208102be6e851f20066bf0e7a96b7d48a07c9b0cfe6d0d4545c2f6cadab"}, + {file = "contourpy-1.2.0-cp310-cp310-win32.whl", hash = "sha256:34b9071c040d6fe45d9826cbbe3727d20d83f1b6110d219b83eb0e2a01d79488"}, + {file = "contourpy-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:bd2f1ae63998da104f16a8b788f685e55d65760cd1929518fd94cd682bf03e41"}, + {file = "contourpy-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dd10c26b4eadae44783c45ad6655220426f971c61d9b239e6f7b16d5cdaaa727"}, + {file = "contourpy-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5c6b28956b7b232ae801406e529ad7b350d3f09a4fde958dfdf3c0520cdde0dd"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebeac59e9e1eb4b84940d076d9f9a6cec0064e241818bcb6e32124cc5c3e377a"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:139d8d2e1c1dd52d78682f505e980f592ba53c9f73bd6be102233e358b401063"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e9dc350fb4c58adc64df3e0703ab076f60aac06e67d48b3848c23647ae4310e"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18fc2b4ed8e4a8fe849d18dce4bd3c7ea637758c6343a1f2bae1e9bd4c9f4686"}, + {file = "contourpy-1.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:16a7380e943a6d52472096cb7ad5264ecee36ed60888e2a3d3814991a0107286"}, + {file = "contourpy-1.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8d8faf05be5ec8e02a4d86f616fc2a0322ff4a4ce26c0f09d9f7fb5330a35c95"}, + {file = "contourpy-1.2.0-cp311-cp311-win32.whl", hash = "sha256:67b7f17679fa62ec82b7e3e611c43a016b887bd64fb933b3ae8638583006c6d6"}, + {file = "contourpy-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:99ad97258985328b4f207a5e777c1b44a83bfe7cf1f87b99f9c11d4ee477c4de"}, + {file = "contourpy-1.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:575bcaf957a25d1194903a10bc9f316c136c19f24e0985a2b9b5608bdf5dbfe0"}, + {file = "contourpy-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9e6c93b5b2dbcedad20a2f18ec22cae47da0d705d454308063421a3b290d9ea4"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:464b423bc2a009088f19bdf1f232299e8b6917963e2b7e1d277da5041f33a779"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:68ce4788b7d93e47f84edd3f1f95acdcd142ae60bc0e5493bfd120683d2d4316"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d7d1f8871998cdff5d2ff6a087e5e1780139abe2838e85b0b46b7ae6cc25399"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e739530c662a8d6d42c37c2ed52a6f0932c2d4a3e8c1f90692ad0ce1274abe0"}, + {file = "contourpy-1.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:247b9d16535acaa766d03037d8e8fb20866d054d3c7fbf6fd1f993f11fc60ca0"}, + {file = "contourpy-1.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:461e3ae84cd90b30f8d533f07d87c00379644205b1d33a5ea03381edc4b69431"}, + {file = "contourpy-1.2.0-cp312-cp312-win32.whl", hash = "sha256:1c2559d6cffc94890b0529ea7eeecc20d6fadc1539273aa27faf503eb4656d8f"}, + {file = "contourpy-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:491b1917afdd8638a05b611a56d46587d5a632cabead889a5440f7c638bc6ed9"}, + {file = "contourpy-1.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5fd1810973a375ca0e097dee059c407913ba35723b111df75671a1976efa04bc"}, + {file = "contourpy-1.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:999c71939aad2780f003979b25ac5b8f2df651dac7b38fb8ce6c46ba5abe6ae9"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7caf9b241464c404613512d5594a6e2ff0cc9cb5615c9475cc1d9b514218ae8"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:266270c6f6608340f6c9836a0fb9b367be61dde0c9a9a18d5ece97774105ff3e"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbd50d0a0539ae2e96e537553aff6d02c10ed165ef40c65b0e27e744a0f10af8"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11f8d2554e52f459918f7b8e6aa20ec2a3bce35ce95c1f0ef4ba36fbda306df5"}, + {file = "contourpy-1.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ce96dd400486e80ac7d195b2d800b03e3e6a787e2a522bfb83755938465a819e"}, + {file = "contourpy-1.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6d3364b999c62f539cd403f8123ae426da946e142312a514162adb2addd8d808"}, + {file = "contourpy-1.2.0-cp39-cp39-win32.whl", hash = "sha256:1c88dfb9e0c77612febebb6ac69d44a8d81e3dc60f993215425b62c1161353f4"}, + {file = "contourpy-1.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:78e6ad33cf2e2e80c5dfaaa0beec3d61face0fb650557100ee36db808bfa6843"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:be16975d94c320432657ad2402f6760990cb640c161ae6da1363051805fa8108"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b95a225d4948b26a28c08307a60ac00fb8671b14f2047fc5476613252a129776"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0d7e03c0f9a4f90dc18d4e77e9ef4ec7b7bbb437f7f675be8e530d65ae6ef956"}, + {file = "contourpy-1.2.0.tar.gz", hash = "sha256:171f311cb758de7da13fc53af221ae47a5877be5a0843a9fe150818c51ed276a"}, +] + +[[package]] +name = "cycler" +version = "0.12.1" +requires_python = ">=3.8" +summary = "Composable style cycles" +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + [[package]] name = "debugpy" version = "1.8.0" @@ -573,6 +673,48 @@ files = [ {file = "fiona-1.9.5.tar.gz", hash = "sha256:99e2604332caa7692855c2ae6ed91e1fffdf9b59449aa8032dd18e070e59a2f7"}, ] +[[package]] +name = "fonttools" +version = "4.47.0" +requires_python = ">=3.8" +summary = "Tools to manipulate font files" +files = [ + {file = "fonttools-4.47.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d2404107626f97a221dc1a65b05396d2bb2ce38e435f64f26ed2369f68675d9"}, + {file = "fonttools-4.47.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c01f409be619a9a0f5590389e37ccb58b47264939f0e8d58bfa1f3ba07d22671"}, + {file = "fonttools-4.47.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d986b66ff722ef675b7ee22fbe5947a41f60a61a4da15579d5e276d897fbc7fa"}, + {file = "fonttools-4.47.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8acf6dd0434b211b3bd30d572d9e019831aae17a54016629fa8224783b22df8"}, + {file = "fonttools-4.47.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:495369c660e0c27233e3c572269cbe520f7f4978be675f990f4005937337d391"}, + {file = "fonttools-4.47.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c59227d7ba5b232281c26ae04fac2c73a79ad0e236bca5c44aae904a18f14faf"}, + {file = "fonttools-4.47.0-cp310-cp310-win32.whl", hash = "sha256:59a6c8b71a245800e923cb684a2dc0eac19c56493e2f896218fcf2571ed28984"}, + {file = "fonttools-4.47.0-cp310-cp310-win_amd64.whl", hash = "sha256:52c82df66201f3a90db438d9d7b337c7c98139de598d0728fb99dab9fd0495ca"}, + {file = "fonttools-4.47.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:854421e328d47d70aa5abceacbe8eef231961b162c71cbe7ff3f47e235e2e5c5"}, + {file = "fonttools-4.47.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:511482df31cfea9f697930f61520f6541185fa5eeba2fa760fe72e8eee5af88b"}, + {file = "fonttools-4.47.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0e2c88c8c985b7b9a7efcd06511fb0a1fe3ddd9a6cd2895ef1dbf9059719d7"}, + {file = "fonttools-4.47.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7a0a8848726956e9d9fb18c977a279013daadf0cbb6725d2015a6dd57527992"}, + {file = "fonttools-4.47.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e869da810ae35afb3019baa0d0306cdbab4760a54909c89ad8904fa629991812"}, + {file = "fonttools-4.47.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dd23848f877c3754f53a4903fb7a593ed100924f9b4bff7d5a4e2e8a7001ae11"}, + {file = "fonttools-4.47.0-cp311-cp311-win32.whl", hash = "sha256:bf1810635c00f7c45d93085611c995fc130009cec5abdc35b327156aa191f982"}, + {file = "fonttools-4.47.0-cp311-cp311-win_amd64.whl", hash = "sha256:61df4dee5d38ab65b26da8efd62d859a1eef7a34dcbc331299a28e24d04c59a7"}, + {file = "fonttools-4.47.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e3f4d61f3a8195eac784f1d0c16c0a3105382c1b9a74d99ac4ba421da39a8826"}, + {file = "fonttools-4.47.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:174995f7b057e799355b393e97f4f93ef1f2197cbfa945e988d49b2a09ecbce8"}, + {file = "fonttools-4.47.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea592e6a09b71cb7a7661dd93ac0b877a6228e2d677ebacbad0a4d118494c86d"}, + {file = "fonttools-4.47.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40bdbe90b33897d9cc4a39f8e415b0fcdeae4c40a99374b8a4982f127ff5c767"}, + {file = "fonttools-4.47.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:843509ae9b93db5aaf1a6302085e30bddc1111d31e11d724584818f5b698f500"}, + {file = "fonttools-4.47.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9acfa1cdc479e0dde528b61423855913d949a7f7fe09e276228298fef4589540"}, + {file = "fonttools-4.47.0-cp312-cp312-win32.whl", hash = "sha256:66c92ec7f95fd9732550ebedefcd190a8d81beaa97e89d523a0d17198a8bda4d"}, + {file = "fonttools-4.47.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8fa20748de55d0021f83754b371432dca0439e02847962fc4c42a0e444c2d78"}, + {file = "fonttools-4.47.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:62d8ddb058b8e87018e5dc26f3258e2c30daad4c87262dfeb0e2617dd84750e6"}, + {file = "fonttools-4.47.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5dde0eab40faaa5476133123f6a622a1cc3ac9b7af45d65690870620323308b4"}, + {file = "fonttools-4.47.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4da089f6dfdb822293bde576916492cd708c37c2501c3651adde39804630538"}, + {file = "fonttools-4.47.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:253bb46bab970e8aae254cebf2ae3db98a4ef6bd034707aa68a239027d2b198d"}, + {file = "fonttools-4.47.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1193fb090061efa2f9e2d8d743ae9850c77b66746a3b32792324cdce65784154"}, + {file = "fonttools-4.47.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:084511482dd265bce6dca24c509894062f0117e4e6869384d853f46c0e6d43be"}, + {file = "fonttools-4.47.0-cp39-cp39-win32.whl", hash = "sha256:97620c4af36e4c849e52661492e31dc36916df12571cb900d16960ab8e92a980"}, + {file = "fonttools-4.47.0-cp39-cp39-win_amd64.whl", hash = "sha256:e77bdf52185bdaf63d39f3e1ac3212e6cfa3ab07d509b94557a8902ce9c13c82"}, + {file = "fonttools-4.47.0-py3-none-any.whl", hash = "sha256:d6477ba902dd2d7adda7f0fd3bfaeb92885d45993c9e1928c9f28fc3961415f7"}, + {file = "fonttools-4.47.0.tar.gz", hash = "sha256:ec13a10715eef0e031858c1c23bfaee6cba02b97558e4a7bfa089dba4a8c2ebf"}, +] + [[package]] name = "future" version = "0.18.3" @@ -973,6 +1115,90 @@ files = [ {file = "jupytext-1.16.0.tar.gz", hash = "sha256:94c7e67775e90e1792c39ab7fca4e0459bf7c35656123e8dc2e9e1b3e953baf8"}, ] +[[package]] +name = "kiwisolver" +version = "1.4.5" +requires_python = ">=3.7" +summary = "A fast implementation of the Cassowary constraint solver" +files = [ + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:05703cf211d585109fcd72207a31bb170a0f22144d68298dc5e61b3c946518af"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:146d14bebb7f1dc4d5fbf74f8a6cb15ac42baadee8912eb84ac0b3b2a3dc6ac3"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ef7afcd2d281494c0a9101d5c571970708ad911d028137cd558f02b851c08b4"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9eaa8b117dc8337728e834b9c6e2611f10c79e38f65157c4c38e9400286f5cb1"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec20916e7b4cbfb1f12380e46486ec4bcbaa91a9c448b97023fde0d5bbf9e4ff"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b42c68602539407884cf70d6a480a469b93b81b7701378ba5e2328660c847a"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa12042de0171fad672b6c59df69106d20d5596e4f87b5e8f76df757a7c399aa"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a40773c71d7ccdd3798f6489aaac9eee213d566850a9533f8d26332d626b82c"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:19df6e621f6d8b4b9c4d45f40a66839294ff2bb235e64d2178f7522d9170ac5b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:83d78376d0d4fd884e2c114d0621624b73d2aba4e2788182d286309ebdeed770"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e391b1f0a8a5a10ab3b9bb6afcfd74f2175f24f8975fb87ecae700d1503cdee0"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:852542f9481f4a62dbb5dd99e8ab7aedfeb8fb6342349a181d4036877410f525"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59edc41b24031bc25108e210c0def6f6c2191210492a972d585a06ff246bb79b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win32.whl", hash = "sha256:a6aa6315319a052b4ee378aa171959c898a6183f15c1e541821c5c59beaa0238"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win_amd64.whl", hash = "sha256:d0ef46024e6a3d79c01ff13801cb19d0cad7fd859b15037aec74315540acc276"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:11863aa14a51fd6ec28688d76f1735f8f69ab1fabf388851a595d0721af042f5"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ab3919a9997ab7ef2fbbed0cc99bb28d3c13e6d4b1ad36e97e482558a91be90"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fcc700eadbbccbf6bc1bcb9dbe0786b4b1cb91ca0dcda336eef5c2beed37b797"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfdd7c0b105af050eb3d64997809dc21da247cf44e63dc73ff0fd20b96be55a9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76c6a5964640638cdeaa0c359382e5703e9293030fe730018ca06bc2010c4437"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbea0db94288e29afcc4c28afbf3a7ccaf2d7e027489c449cf7e8f83c6346eb9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ceec1a6bc6cab1d6ff5d06592a91a692f90ec7505d6463a88a52cc0eb58545da"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:040c1aebeda72197ef477a906782b5ab0d387642e93bda547336b8957c61022e"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f91de7223d4c7b793867797bacd1ee53bfe7359bd70d27b7b58a04efbb9436c8"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:faae4860798c31530dd184046a900e652c95513796ef51a12bc086710c2eec4d"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0157420efcb803e71d1b28e2c287518b8808b7cf1ab8af36718fd0a2c453eb0"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:06f54715b7737c2fecdbf140d1afb11a33d59508a47bf11bb38ecf21dc9ab79f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fdb7adb641a0d13bdcd4ef48e062363d8a9ad4a182ac7647ec88f695e719ae9f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win32.whl", hash = "sha256:bb86433b1cfe686da83ce32a9d3a8dd308e85c76b60896d58f082136f10bffac"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c08e1312a9cf1074d17b17728d3dfce2a5125b2d791527f33ffbe805200a355"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:32d5cf40c4f7c7b3ca500f8985eb3fb3a7dfc023215e876f207956b5ea26632a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f846c260f483d1fd217fe5ed7c173fb109efa6b1fc8381c8b7552c5781756192"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5ff5cf3571589b6d13bfbfd6bcd7a3f659e42f96b5fd1c4830c4cf21d4f5ef45"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7269d9e5f1084a653d575c7ec012ff57f0c042258bf5db0954bf551c158466e7"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da802a19d6e15dffe4b0c24b38b3af68e6c1a68e6e1d8f30148c83864f3881db"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3aba7311af82e335dd1e36ffff68aaca609ca6290c2cb6d821a39aa075d8e3ff"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:763773d53f07244148ccac5b084da5adb90bfaee39c197554f01b286cf869228"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2270953c0d8cdab5d422bee7d2007f043473f9d2999631c86a223c9db56cbd16"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d099e745a512f7e3bbe7249ca835f4d357c586d78d79ae8f1dcd4d8adeb9bda9"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:74db36e14a7d1ce0986fa104f7d5637aea5c82ca6326ed0ec5694280942d1162"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e5bab140c309cb3a6ce373a9e71eb7e4873c70c2dda01df6820474f9889d6d4"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0f114aa76dc1b8f636d077979c0ac22e7cd8f3493abbab152f20eb8d3cda71f3"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:88a2df29d4724b9237fc0c6eaf2a1adae0cdc0b3e9f4d8e7dc54b16812d2d81a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win32.whl", hash = "sha256:72d40b33e834371fd330fb1472ca19d9b8327acb79a5821d4008391db8e29f20"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:2c5674c4e74d939b9d91dda0fae10597ac7521768fec9e399c70a1f27e2ea2d9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9407b6a5f0d675e8a827ad8742e1d6b49d9c1a1da5d952a67d50ef5f4170b18d"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15568384086b6df3c65353820a4473575dbad192e35010f622c6ce3eebd57af9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0dc9db8e79f0036e8173c466d21ef18e1befc02de8bf8aa8dc0813a6dc8a7046"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cdc8a402aaee9a798b50d8b827d7ecf75edc5fb35ea0f91f213ff927c15f4ff0"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:955e8513d07a283056b1396e9a57ceddbd272d9252c14f154d450d227606eb54"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:346f5343b9e3f00b8db8ba359350eb124b98c99efd0b408728ac6ebf38173958"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9098e0049e88c6a24ff64545cdfc50807818ba6c1b739cae221bbbcbc58aad3"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:00bd361b903dc4bbf4eb165f24d1acbee754fce22ded24c3d56eec268658a5cf"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7b8b454bac16428b22560d0a1cf0a09875339cab69df61d7805bf48919415901"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f1d072c2eb0ad60d4c183f3fb44ac6f73fb7a8f16a2694a91f988275cbf352f9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:31a82d498054cac9f6d0b53d02bb85811185bcb477d4b60144f915f3b3126342"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6512cb89e334e4700febbffaaa52761b65b4f5a3cf33f960213d5656cea36a77"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win32.whl", hash = "sha256:9db8ea4c388fdb0f780fe91346fd438657ea602d58348753d9fb265ce1bca67f"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win_amd64.whl", hash = "sha256:59415f46a37f7f2efeec758353dd2eae1b07640d8ca0f0c42548ec4125492635"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5c7b3b3a728dc6faf3fc372ef24f21d1e3cee2ac3e9596691d746e5a536de920"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:620ced262a86244e2be10a676b646f29c34537d0d9cc8eb26c08f53d98013390"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:378a214a1e3bbf5ac4a8708304318b4f890da88c9e6a07699c4ae7174c09a68d"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf7be1207676ac608a50cd08f102f6742dbfc70e8d60c4db1c6897f62f71523"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ba55dce0a9b8ff59495ddd050a0225d58bd0983d09f87cfe2b6aec4f2c1234e4"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fd32ea360bcbb92d28933fc05ed09bffcb1704ba3fc7942e81db0fd4f81a7892"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5e7139af55d1688f8b960ee9ad5adafc4ac17c1c473fe07133ac092310d76544"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dced8146011d2bc2e883f9bd68618b8247387f4bbec46d7392b3c3b032640126"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9bf3325c47b11b2e51bca0824ea217c7cd84491d8ac4eefd1e409705ef092bd"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5794cf59533bc3f1b1c821f7206a3617999db9fbefc345360aafe2e067514929"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e368f200bbc2e4f905b8e71eb38b3c04333bddaa6a2464a6355487b02bb7fb09"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d706eba36b4c4d5bc6c6377bb6568098765e990cfc21ee16d13963fab7b3e7"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85267bd1aa8880a9c88a8cb71e18d3d64d2751a790e6ca6c27b8ccc724bcd5ad"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210ef2c3a1f03272649aff1ef992df2e724748918c4bc2d5a90352849eb40bea"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:11d011a7574eb3b82bcc9c1a1d35c1d7075677fdd15de527d91b46bd35e935ee"}, + {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, +] + [[package]] name = "lexid" version = "2021.1006" @@ -1085,6 +1311,54 @@ files = [ {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] +[[package]] +name = "matplotlib" +version = "3.8.2" +requires_python = ">=3.9" +summary = "Python plotting package" +dependencies = [ + "contourpy>=1.0.1", + "cycler>=0.10", + "fonttools>=4.22.0", + "importlib-resources>=3.2.0; python_version < \"3.10\"", + "kiwisolver>=1.3.1", + "numpy<2,>=1.21", + "packaging>=20.0", + "pillow>=8", + "pyparsing>=2.3.1", + "python-dateutil>=2.7", +] +files = [ + {file = "matplotlib-3.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:09796f89fb71a0c0e1e2f4bdaf63fb2cefc84446bb963ecdeb40dfee7dfa98c7"}, + {file = "matplotlib-3.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f9c6976748a25e8b9be51ea028df49b8e561eed7809146da7a47dbecebab367"}, + {file = "matplotlib-3.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b78e4f2cedf303869b782071b55fdde5987fda3038e9d09e58c91cc261b5ad18"}, + {file = "matplotlib-3.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e208f46cf6576a7624195aa047cb344a7f802e113bb1a06cfd4bee431de5e31"}, + {file = "matplotlib-3.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:46a569130ff53798ea5f50afce7406e91fdc471ca1e0e26ba976a8c734c9427a"}, + {file = "matplotlib-3.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:830f00640c965c5b7f6bc32f0d4ce0c36dfe0379f7dd65b07a00c801713ec40a"}, + {file = "matplotlib-3.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d86593ccf546223eb75a39b44c32788e6f6440d13cfc4750c1c15d0fcb850b63"}, + {file = "matplotlib-3.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9a5430836811b7652991939012f43d2808a2db9b64ee240387e8c43e2e5578c8"}, + {file = "matplotlib-3.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9576723858a78751d5aacd2497b8aef29ffea6d1c95981505877f7ac28215c6"}, + {file = "matplotlib-3.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ba9cbd8ac6cf422f3102622b20f8552d601bf8837e49a3afed188d560152788"}, + {file = "matplotlib-3.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:03f9d160a29e0b65c0790bb07f4f45d6a181b1ac33eb1bb0dd225986450148f0"}, + {file = "matplotlib-3.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:3773002da767f0a9323ba1a9b9b5d00d6257dbd2a93107233167cfb581f64717"}, + {file = "matplotlib-3.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:4c318c1e95e2f5926fba326f68177dee364aa791d6df022ceb91b8221bd0a627"}, + {file = "matplotlib-3.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:091275d18d942cf1ee9609c830a1bc36610607d8223b1b981c37d5c9fc3e46a4"}, + {file = "matplotlib-3.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b0f3b8ea0e99e233a4bcc44590f01604840d833c280ebb8fe5554fd3e6cfe8d"}, + {file = "matplotlib-3.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7b1704a530395aaf73912be741c04d181f82ca78084fbd80bc737be04848331"}, + {file = "matplotlib-3.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533b0e3b0c6768eef8cbe4b583731ce25a91ab54a22f830db2b031e83cca9213"}, + {file = "matplotlib-3.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:0f4fc5d72b75e2c18e55eb32292659cf731d9d5b312a6eb036506304f4675630"}, + {file = "matplotlib-3.8.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:deaed9ad4da0b1aea77fe0aa0cebb9ef611c70b3177be936a95e5d01fa05094f"}, + {file = "matplotlib-3.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:172f4d0fbac3383d39164c6caafd3255ce6fa58f08fc392513a0b1d3b89c4f89"}, + {file = "matplotlib-3.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7d36c2209d9136cd8e02fab1c0ddc185ce79bc914c45054a9f514e44c787917"}, + {file = "matplotlib-3.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5864bdd7da445e4e5e011b199bb67168cdad10b501750367c496420f2ad00843"}, + {file = "matplotlib-3.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ef8345b48e95cee45ff25192ed1f4857273117917a4dcd48e3905619bcd9c9b8"}, + {file = "matplotlib-3.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:7c48d9e221b637c017232e3760ed30b4e8d5dfd081daf327e829bf2a72c731b4"}, + {file = "matplotlib-3.8.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:aa11b3c6928a1e496c1a79917d51d4cd5d04f8a2e75f21df4949eeefdf697f4b"}, + {file = "matplotlib-3.8.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1095fecf99eeb7384dabad4bf44b965f929a5f6079654b681193edf7169ec20"}, + {file = "matplotlib-3.8.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:bddfb1db89bfaa855912261c805bd0e10218923cc262b9159a49c29a7a1c1afa"}, + {file = "matplotlib-3.8.2.tar.gz", hash = "sha256:01a978b871b881ee76017152f1f1a0cbf6bd5f7b8ff8c96df0df1bd57d8755a1"}, +] + [[package]] name = "matplotlib-inline" version = "0.1.6" @@ -1472,6 +1746,16 @@ files = [ {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"}, ] +[[package]] +name = "networkx" +version = "3.2.1" +requires_python = ">=3.9" +summary = "Python package for creating and manipulating graphs and networks" +files = [ + {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"}, + {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"}, +] + [[package]] name = "nodeenv" version = "1.8.0" @@ -1529,6 +1813,24 @@ files = [ {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"}, ] +[[package]] +name = "osmnx" +version = "1.8.0" +requires_python = ">=3.8" +summary = "Download, model, analyze, and visualize street networks and other geospatial features from OpenStreetMap" +dependencies = [ + "geopandas>=0.12", + "networkx>=2.5", + "numpy>=1.20", + "pandas>=1.1", + "requests>=2.27", + "shapely>=2.0", +] +files = [ + {file = "osmnx-1.8.0-py3-none-any.whl", hash = "sha256:b1a7c29feb8a4360f7e0b5044798f3b186879ac0a0e4b9f0ad9cff97cfd01dac"}, + {file = "osmnx-1.8.0.tar.gz", hash = "sha256:1d6ec9c1114cce2572c4b88e7b2dfb8d0ca5c6591b08e45301dc7f32967c76cf"}, +] + [[package]] name = "packaging" version = "23.2" @@ -1630,6 +1932,59 @@ files = [ {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"}, ] +[[package]] +name = "pillow" +version = "10.1.0" +requires_python = ">=3.8" +summary = "Python Imaging Library (Fork)" +files = [ + {file = "Pillow-10.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1ab05f3db77e98f93964697c8efc49c7954b08dd61cff526b7f2531a22410106"}, + {file = "Pillow-10.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6932a7652464746fcb484f7fc3618e6503d2066d853f68a4bd97193a3996e273"}, + {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f63b5a68daedc54c7c3464508d8c12075e56dcfbd42f8c1bf40169061ae666"}, + {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0949b55eb607898e28eaccb525ab104b2d86542a85c74baf3a6dc24002edec2"}, + {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ae88931f93214777c7a3aa0a8f92a683f83ecde27f65a45f95f22d289a69e593"}, + {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b0eb01ca85b2361b09480784a7931fc648ed8b7836f01fb9241141b968feb1db"}, + {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d27b5997bdd2eb9fb199982bb7eb6164db0426904020dc38c10203187ae2ff2f"}, + {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7df5608bc38bd37ef585ae9c38c9cd46d7c81498f086915b0f97255ea60c2818"}, + {file = "Pillow-10.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:41f67248d92a5e0a2076d3517d8d4b1e41a97e2df10eb8f93106c89107f38b57"}, + {file = "Pillow-10.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1fb29c07478e6c06a46b867e43b0bcdb241b44cc52be9bc25ce5944eed4648e7"}, + {file = "Pillow-10.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2cdc65a46e74514ce742c2013cd4a2d12e8553e3a2563c64879f7c7e4d28bce7"}, + {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50d08cd0a2ecd2a8657bd3d82c71efd5a58edb04d9308185d66c3a5a5bed9610"}, + {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062a1610e3bc258bff2328ec43f34244fcec972ee0717200cb1425214fe5b839"}, + {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:61f1a9d247317fa08a308daaa8ee7b3f760ab1809ca2da14ecc88ae4257d6172"}, + {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a646e48de237d860c36e0db37ecaecaa3619e6f3e9d5319e527ccbc8151df061"}, + {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:47e5bf85b80abc03be7455c95b6d6e4896a62f6541c1f2ce77a7d2bb832af262"}, + {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a92386125e9ee90381c3369f57a2a50fa9e6aa8b1cf1d9c4b200d41a7dd8e992"}, + {file = "Pillow-10.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f7c276c05a9767e877a0b4c5050c8bee6a6d960d7f0c11ebda6b99746068c2a"}, + {file = "Pillow-10.1.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:a89b8312d51715b510a4fe9fc13686283f376cfd5abca8cd1c65e4c76e21081b"}, + {file = "Pillow-10.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:00f438bb841382b15d7deb9a05cc946ee0f2c352653c7aa659e75e592f6fa17d"}, + {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d929a19f5469b3f4df33a3df2983db070ebb2088a1e145e18facbc28cae5b27"}, + {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a92109192b360634a4489c0c756364c0c3a2992906752165ecb50544c251312"}, + {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:0248f86b3ea061e67817c47ecbe82c23f9dd5d5226200eb9090b3873d3ca32de"}, + {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9882a7451c680c12f232a422730f986a1fcd808da0fd428f08b671237237d651"}, + {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1c3ac5423c8c1da5928aa12c6e258921956757d976405e9467c5f39d1d577a4b"}, + {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:806abdd8249ba3953c33742506fe414880bad78ac25cc9a9b1c6ae97bedd573f"}, + {file = "Pillow-10.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:eaed6977fa73408b7b8a24e8b14e59e1668cfc0f4c40193ea7ced8e210adf996"}, + {file = "Pillow-10.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0a026c188be3b443916179f5d04548092e253beb0c3e2ee0a4e2cdad72f66099"}, + {file = "Pillow-10.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:04f6f6149f266a100374ca3cc368b67fb27c4af9f1cc8cb6306d849dcdf12616"}, + {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb40c011447712d2e19cc261c82655f75f32cb724788df315ed992a4d65696bb"}, + {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a8413794b4ad9719346cd9306118450b7b00d9a15846451549314a58ac42219"}, + {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c9aeea7b63edb7884b031a35305629a7593272b54f429a9869a4f63a1bf04c34"}, + {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b4005fee46ed9be0b8fb42be0c20e79411533d1fd58edabebc0dd24626882cfd"}, + {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0152565c6aa6ebbfb1e5d8624140a440f2b99bf7afaafbdbf6430426497f28"}, + {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d921bc90b1defa55c9917ca6b6b71430e4286fc9e44c55ead78ca1a9f9eba5f2"}, + {file = "Pillow-10.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfe96560c6ce2f4c07d6647af2d0f3c54cc33289894ebd88cfbb3bcd5391e256"}, + {file = "Pillow-10.1.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:937bdc5a7f5343d1c97dc98149a0be7eb9704e937fe3dc7140e229ae4fc572a7"}, + {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1c25762197144e211efb5f4e8ad656f36c8d214d390585d1d21281f46d556ba"}, + {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:afc8eef765d948543a4775f00b7b8c079b3321d6b675dde0d02afa2ee23000b4"}, + {file = "Pillow-10.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:883f216eac8712b83a63f41b76ddfb7b2afab1b74abbb413c5df6680f071a6b9"}, + {file = "Pillow-10.1.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b920e4d028f6442bea9a75b7491c063f0b9a3972520731ed26c83e254302eb1e"}, + {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c41d960babf951e01a49c9746f92c5a7e0d939d1652d7ba30f6b3090f27e412"}, + {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1fafabe50a6977ac70dfe829b2d5735fd54e190ab55259ec8aea4aaea412fa0b"}, + {file = "Pillow-10.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b834f4b16173e5b92ab6566f0473bfb09f939ba14b23b8da1f54fa63e4b623f"}, + {file = "Pillow-10.1.0.tar.gz", hash = "sha256:e6bf8de6c36ed96c86ea3b6e1d5273c53f46ef518a062464cd7ef5dd2cf92e38"}, +] + [[package]] name = "platformdirs" version = "4.1.0" @@ -2451,7 +2806,7 @@ files = [ [[package]] name = "srai" -version = "0.6.1" +version = "0.6.2" requires_python = ">=3.9" summary = "A set of python modules for geospatial machine learning and data mining" dependencies = [ @@ -2472,8 +2827,8 @@ dependencies = [ "typeguard", ] files = [ - {file = "srai-0.6.1-py3-none-any.whl", hash = "sha256:9d256c4b035d958f34fed133a85063adc6f61e4fc92654334ac936fe5ccf5342"}, - {file = "srai-0.6.1.tar.gz", hash = "sha256:794a729334b4c369cc10b936c110e97a0b43cd00fd8f145ed452301e89524784"}, + {file = "srai-0.6.2-py3-none-any.whl", hash = "sha256:4b65ee965f007b06fd807d50dfbb254d2d003b11d9f8e681aa5133aa76205b41"}, + {file = "srai-0.6.2.tar.gz", hash = "sha256:58cd92bf8bfb6bcc10a133ba60eeae53cab57b412dfb38227b0925b76751ba4a"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 61d1624..caca731 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,15 +2,14 @@ name = "QuackOSM" version = "0.1.0" description = "An open-source tool for reading OpenStreetMap PBF files using DuckDB" -authors = [ - { name = "Kamil Raczycki", email = "kraczycki@kraina.ai" }, -] +authors = [{ name = "Kamil Raczycki", email = "kraczycki@kraina.ai" }] dependencies = [ "geopandas", "shapely", "pyarrow>=13.0.0", "duckdb>=0.9.2", - "geoarrow-pyarrow @ git+https://github.com/geoarrow/geoarrow-python.git@0a95d5ff3180c20e89f4572ac38d255f58e13396#subdirectory=geoarrow-pyarrow" + "geoarrow-pyarrow @ git+https://github.com/geoarrow/geoarrow-python.git@0a95d5ff3180c20e89f4572ac38d255f58e13396#subdirectory=geoarrow-pyarrow", + "typeguard", ] requires-python = ">=3.9" readme = "README.md" @@ -52,7 +51,7 @@ test = [ "pytest-parametrization", "pytest-xdist", "pyogrio", - "srai" + "srai>=0.6.2", ] # pdm add -dG docs docs = [ @@ -65,8 +64,94 @@ docs = [ "mkdocs-gen-files", "mkdocs-awesome-pages-plugin", "mike", + "black", + "osmnx", + "matplotlib", ] license = ["licensecheck"] [tool.pdm.scripts] post_install = "pre-commit install" + +[tool.black] +line-length = 100 +target-version = ["py39", "py310", "py311"] +preview = true + +[tool.ruff] +line-length = 100 +target-version = "py39" +select = [ + "E", + "W", # pycodestyle + "F", # pyflakes + "UP", # pyupgrade + "D", # pydocstyle + "I", # isort + "B", # flake8-bugbear + "NPY", # NumPy + "YTT", # flake8-2020 + "Q", # flake8-quotes + "PLE", + "PLW", # pylint (add "PLR" in the future) + "PIE", # misc lints + "TID", # tidy imports + "ISC", # implicit string concatenation + "TCH", # type-checking imports + # "N", # pep8-naming + # "ANN", # flake8-annotations +] +ignore = ["D212"] +extend-exclude = ["old"] + +[tool.ruff.pydocstyle] +convention = "google" + +[tool.ruff.pycodestyle] +max-doc-length = 100 + +[tool.mypy] +strict = true +show_column_numbers = true +disallow_untyped_defs = true +no_implicit_optional = true +check_untyped_defs = true +warn_return_any = true + +[tool.docformatter] +syntax = 'google' +black = true +recursive = true +wrap-summaries = 100 +wrap-descriptions = 100 +# force-wrap = false # uncomment after https://github.com/PyCQA/docformatter/issues/68 is resolved +tab-width = 4 +blank = false +pre-summary-newline = true +close-quotes-on-newline = true +wrap-one-line = true + +[tool.bumpver] +current_version = "0.1.0" +version_pattern = "MAJOR.MINOR.PATCH[PYTAGNUM]" +commit_message = "chore(CI/CD): bump version {old_version} -> {new_version}" +commit = true +tag = false +push = false + +[tool.bumpver.file_patterns] +"pyproject.toml" = [ + '^current_version = "{version}"$', + '^version = "{version}"$', +] +"quackosm/__init__.py" = ['^__version__ = "{version}"$'] + +[tool.pytest.ini_options] +addopts = ["--import-mode=importlib"] +markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"] +log_cli = true + +[tool.licensecheck] +using = "requirements" +zero = false +ignore_licenses = ["UNKNOWN"] diff --git a/quackosm/__init__.py b/quackosm/__init__.py index 098058f..87cb384 100644 --- a/quackosm/__init__.py +++ b/quackosm/__init__.py @@ -1,7 +1,17 @@ -"""QuackOSM. +""" +QuackOSM. -QuackOSM is a Python library used for reading pbf (ProtoBuffer) files -with OpenStreetMap data using DuckDB spatial extension without GDAL. +QuackOSM is a Python library used for reading pbf (ProtoBuffer) files with OpenStreetMap data using +DuckDB spatial extension without GDAL. """ +from quackosm.functions import convert_pbf_to_gpq, get_features_gdf +from quackosm.pbf_file_reader import PbfFileReader + __version__ = "0.1.0" + +__all__ = [ + "PbfFileReader", + "convert_pbf_to_gpq", + "get_features_gdf", +] diff --git a/quackosm/_constants.py b/quackosm/_constants.py new file mode 100644 index 0000000..0e9bc6e --- /dev/null +++ b/quackosm/_constants.py @@ -0,0 +1,7 @@ +"""Constants used across the project.""" + +WGS84_CRS = "EPSG:4326" + +FEATURES_INDEX = "feature_id" + +GEOMETRY_COLUMN = "geometry" diff --git a/quackosm/_osm_tags_filters.py b/quackosm/_osm_tags_filters.py new file mode 100644 index 0000000..6da8fba --- /dev/null +++ b/quackosm/_osm_tags_filters.py @@ -0,0 +1,130 @@ +"""Module contains a dedicated type alias for OSM tags filter.""" + +from collections.abc import Iterable +from typing import Union, cast, overload + +from quackosm._typing import is_expected_type + +OsmTagsFilter = dict[str, Union[list[str], str, bool]] + +GroupedOsmTagsFilter = dict[str, OsmTagsFilter] + + +@overload +def merge_osm_tags_filter(osm_tags_filter: OsmTagsFilter) -> OsmTagsFilter: ... + + +@overload +def merge_osm_tags_filter(osm_tags_filter: GroupedOsmTagsFilter) -> OsmTagsFilter: ... + + +@overload +def merge_osm_tags_filter(osm_tags_filter: Iterable[OsmTagsFilter]) -> OsmTagsFilter: ... + + +@overload +def merge_osm_tags_filter(osm_tags_filter: Iterable[GroupedOsmTagsFilter]) -> OsmTagsFilter: ... + + +def merge_osm_tags_filter( + osm_tags_filter: Union[ + OsmTagsFilter, GroupedOsmTagsFilter, Iterable[OsmTagsFilter], Iterable[GroupedOsmTagsFilter] + ] +) -> OsmTagsFilter: + """ + Merge OSM tags filter into `OsmTagsFilter` type. + + Optionally merges `GroupedOsmTagsFilter` into `OsmTagsFilter` to allow loaders to load all + defined groups during single operation. + + Args: + osm_tags_filter: OSM tags filter definition. + + Raises: + AttributeError: When provided tags don't match both + `OsmTagsFilter` or `GroupedOsmTagsFilter`. + + Returns: + OsmTagsFilter: Merged filters. + """ + if is_expected_type(osm_tags_filter, OsmTagsFilter): + return cast(OsmTagsFilter, osm_tags_filter) + elif is_expected_type(osm_tags_filter, GroupedOsmTagsFilter): + return _merge_grouped_osm_tags_filter(cast(GroupedOsmTagsFilter, osm_tags_filter)) + elif is_expected_type(osm_tags_filter, Iterable): + return _merge_multiple_osm_tags_filters( + [ + merge_osm_tags_filter( + cast(Union[OsmTagsFilter, GroupedOsmTagsFilter], sub_osm_tags_filter) + ) + for sub_osm_tags_filter in osm_tags_filter + ] + ) + + raise AttributeError( + "Provided tags don't match required type definitions" + " (OsmTagsFilter or GroupedOsmTagsFilter)." + ) + + +def _merge_grouped_osm_tags_filter(grouped_filter: GroupedOsmTagsFilter) -> OsmTagsFilter: + """ + Merge grouped osm tags filter into a base one. + + Function merges all filter categories into a single one for an OSM loader to use. + + Args: + grouped_filter (GroupedOsmTagsFilter): Grouped filter to be merged into a single one. + + Returns: + osm_tags_type: Merged filter. + """ + if not is_expected_type(grouped_filter, GroupedOsmTagsFilter): + raise ValueError( + "Provided filter doesn't match required `GroupedOsmTagsFilter` definition." + ) + + return _merge_multiple_osm_tags_filters(grouped_filter.values()) + + +def _merge_multiple_osm_tags_filters(osm_tags_filters: Iterable[OsmTagsFilter]) -> OsmTagsFilter: + """ + Merge multiple osm tags filters into a single one. + + Function merges all OsmTagsFilters into a single one for an OSM loader to use. + + Args: + osm_tags_filters (Iterable[OsmTagsFilter]): List of filters to be merged into a single one. + + Returns: + osm_tags_type: Merged filter. + """ + if not is_expected_type(osm_tags_filters, Iterable[OsmTagsFilter]): + raise ValueError( + "Provided filter doesn't match required `Iterable[OsmTagsFilter]` definition." + ) + + result: OsmTagsFilter = {} + for osm_tags_filter in osm_tags_filters: + for osm_tag_key, osm_tag_value in osm_tags_filter.items(): + if osm_tag_key not in result: + result[osm_tag_key] = [] + + # If filter is already a positive boolean, skip + if isinstance(result[osm_tag_key], bool) and result[osm_tag_key]: + continue + + current_values_list = cast(list[str], result[osm_tag_key]) + + # Check bool + if isinstance(osm_tag_value, bool) and osm_tag_value: + result[osm_tag_key] = True + # Check string + elif isinstance(osm_tag_value, str) and osm_tag_value not in current_values_list: + current_values_list.append(osm_tag_value) + # Check list + elif isinstance(osm_tag_value, list): + new_values = [value for value in osm_tag_value if value not in current_values_list] + current_values_list.extend(new_values) + + return result diff --git a/quackosm/_osm_way_polygon_features.py b/quackosm/_osm_way_polygon_features.py new file mode 100644 index 0000000..f95a38b --- /dev/null +++ b/quackosm/_osm_way_polygon_features.py @@ -0,0 +1,127 @@ +from collections.abc import Iterable +from typing import Any, NamedTuple, cast + +from quackosm._typing import is_expected_type + + +class OsmWayPolygonConfig(NamedTuple): + """OSM Way polygon features config object.""" + + all: Iterable[str] + allowlist: dict[str, Iterable[str]] + denylist: dict[str, Iterable[str]] + + +def parse_dict_to_config_object(raw_config: dict[str, Any]) -> OsmWayPolygonConfig: + all_tags = raw_config.get("all", []) + allowlist_tags = raw_config.get("allowlist", {}) + denylist_tags = raw_config.get("denylist", {}) + if not is_expected_type(all_tags, Iterable[str]): + raise ValueError(f"Wrong type of key: all ({type(all_tags)})") + + if not is_expected_type(allowlist_tags, dict[str, Iterable[str]]): + raise ValueError(f"Wrong type of key: all ({type(allowlist_tags)})") + + if not is_expected_type(denylist_tags, dict[str, Iterable[str]]): + raise ValueError(f"Wrong type of key: denylist ({type(denylist_tags)})") + + return OsmWayPolygonConfig( + all=cast(Iterable[str], all_tags), + allowlist=cast(dict[str, Iterable[str]], allowlist_tags), + denylist=cast(dict[str, Iterable[str]], denylist_tags), + ) + + +# Config based on two sources + manual OSM wiki check +# 1. https://github.com/tyrasd/osm-polygon-features/blob/master/polygon-features.json +# 2. https://github.com/ideditor/id-area-keys/blob/main/areaKeys.json +OSM_WAY_POLYGON_CONFIG_RAW = { + "all": [ + "allotments", + "area:highway", + "boundary", + "bridge:support", + "building:part", + "building", + "cemetery", + "club", + "craft", + "demolished:building", + "disused:amenity", + "disused:leisure", + "disused:shop", + "healthcare", + "historic", + "industrial", + "internet_access", + "junction", + "landuse", + "leisure", + "office", + "place", + "police", + "polling_station", + "public_transport", + "residential", + "ruins", + "seamark:type", + "shop", + "sport", + "telecom", + "tourism", + ], + "allowlist": { + "advertising": ["sculpture", "sign"], + "aerialway": ["station"], + "barrier": ["city_wall", "hedge", "wall", "toll_booth"], + "highway": ["services", "rest_area", "platform"], + "railway": ["station", "turntable", "roundhouse", "platform"], + "waterway": ["riverbank", "dock", "boatyard", "dam", "fuel"], + }, + "denylist": { + "aeroway": ["jet_bridge", "parking_position", "taxiway", "no"], + "amenity": ["bench", "weighbridge"], + "attraction": ["river_rafting", "train", "water_slide", "boat_ride"], + "emergency": ["designated", "destination", "no", "official", "private", "yes"], + "geological": ["volcanic_caldera_rim", "fault"], + "golf": ["cartpath", "hole", "path"], + "indoor": ["corridor", "wall"], + "man_made": [ + "yes", + "breakwater", + "carpet_hanger", + "crane", + "cutline", + "dyke", + "embankment", + "goods_conveyor", + "groyne", + "pier", + "pipeline", + "torii", + "video_wall", + ], + "military": ["trench"], + "natural": [ + "bay", + "cliff", + "coastline", + "ridge", + "strait", + "tree_row", + "valley", + "no", + "arete", + ], + "piste:type": ["downhill", "hike", "ice_skate", "nordic", "skitour", "sled", "sleigh"], + "playground": [ + "balancebeam", + "rope_traverse", + "stepping_stone", + "stepping_post", + "rope_swing", + "climbing_slope", + ], + "power": ["cable", "line", "minor_line", "insulator", "busbar", "bay", "portal"], + }, +} diff --git a/quackosm/_typing.py b/quackosm/_typing.py new file mode 100644 index 0000000..2759fd2 --- /dev/null +++ b/quackosm/_typing.py @@ -0,0 +1,28 @@ +"""Utility function for typing purposes.""" + +from contextlib import suppress +from typing import Any + +from typeguard import TypeCheckError, check_type + + +def is_expected_type(value: object, expected_type: Any) -> bool: + """ + Check if an object is a given type. + + Uses `typeguard` library to check objects using `typing` definitions. + + Args: + value (object): Value to be checked against `expected_type`. + expected_type (Any): A class or generic type instance. + + Returns: + bool: Flag whether the object is an instance of the required type. + """ + result = False + + with suppress(TypeCheckError): + check_type(value, expected_type) + result = True + + return result diff --git a/quackosm/functions.py b/quackosm/functions.py new file mode 100644 index 0000000..c4ec67f --- /dev/null +++ b/quackosm/functions.py @@ -0,0 +1,118 @@ +""" +Functions. + +This module contains helper functions to simplify the usage. +""" + +from collections.abc import Iterable +from pathlib import Path +from typing import Optional, Union + +import geopandas as gpd +from shapely.geometry.base import BaseGeometry + +from quackosm._osm_tags_filters import GroupedOsmTagsFilter, OsmTagsFilter +from quackosm.pbf_file_reader import PbfFileReader + + +def convert_pbf_to_gpq( + pbf_path: Union[str, Path], + tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None, + geometry_filter: Optional[BaseGeometry] = None, + result_file_path: Optional[Union[str, Path]] = None, + explode_tags: Optional[bool] = None, + ignore_cache: bool = False, + filter_osm_ids: Optional[list[str]] = None, +) -> Path: + """ + Convert PBF file to GeoParquet file. + + Args: + pbf_path (Union[str, Path]): Pbf file to be parsed to GeoParquet. + tags_filter (Union[OsmTagsFilter, GroupedOsmTagsFilter], optional): A dictionary + specifying which tags to download. + The keys should be OSM tags (e.g. `building`, `amenity`). + The values should either be `True` for retrieving all objects with the tag, + string for retrieving a single tag-value pair + or list of strings for retrieving all values specified in the list. + `tags={'leisure': 'park}` would return parks from the area. + `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}` + would return parks, all amenity types, bakeries and bicycle shops. + If `None`, handler will allow all of the tags to be parsed. Defaults to `None`. + geometry_filter (BaseGeometry, optional): Region which can be used to filter only + intersecting OSM objects. Defaults to `None`. + result_file_path (Union[str, Path], optional): Where to save + the geoparquet file. If not provided, will be generated based on hashes + from provided tags filter and geometry filter. Defaults to `None`. + explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. + If `None`, will be set based on tags_filter parameter. If no tags filter is provided, + then explode_tags will set to `False`, if there is tags filter it will set to `True`. + Defaults to `None`. + ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not. + Defaults to False. + filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. + Have to be in the form of 'node/', 'way/' or 'relation/'. + Defaults to an empty list. + + Returns: + Path: Path to the generated GeoParquet file. + """ + reader = PbfFileReader(tags_filter=tags_filter, geometry_filter=geometry_filter) + return reader.convert_pbf_to_gpq( + pbf_path=pbf_path, + result_file_path=result_file_path, + explode_tags=explode_tags, + ignore_cache=ignore_cache, + filter_osm_ids=filter_osm_ids, + ) + + +def get_features_gdf( + file_paths: Union[str, Path, Iterable[Union[str, Path]]], + tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None, + geometry_filter: Optional[BaseGeometry] = None, + explode_tags: Optional[bool] = None, + ignore_cache: bool = False, + filter_osm_ids: Optional[list[str]] = None, +) -> gpd.GeoDataFrame: + """ + Get features GeoDataFrame from a PBF file or list of PBF files. + + Function can parse multiple PBF files and returns a single GeoDataFrame with loaded + OSM objects. + + Args: + file_paths (Union[str, Path, Iterable[Union[str, Path]]]): + Path or list of paths of `*.osm.pbf` files to be parsed. + tags_filter (Union[OsmTagsFilter, GroupedOsmTagsFilter], optional): A dictionary + specifying which tags to download. + The keys should be OSM tags (e.g. `building`, `amenity`). + The values should either be `True` for retrieving all objects with the tag, + string for retrieving a single tag-value pair + or list of strings for retrieving all values specified in the list. + `tags={'leisure': 'park}` would return parks from the area. + `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}` + would return parks, all amenity types, bakeries and bicycle shops. + If `None`, handler will allow all of the tags to be parsed. Defaults to `None`. + geometry_filter (BaseGeometry, optional): Region which can be used to filter only + intersecting OSM objects. Defaults to `None`. + explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. + If `None`, will be set based on tags_filter parameter. If no tags filter is provided, + then explode_tags will set to `False`, if there is tags filter it will set to `True`. + Defaults to `None`. + ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not. + Defaults to False. + filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. + Have to be in the form of 'node/', 'way/' or 'relation/'. + Defaults to an empty list. + + Returns: + gpd.GeoDataFrame: GeoDataFrame with OSM features. + """ + reader = PbfFileReader(tags_filter=tags_filter, geometry_filter=geometry_filter) + return reader.get_features_gdf( + file_paths=file_paths, + explode_tags=explode_tags, + ignore_cache=ignore_cache, + filter_osm_ids=filter_osm_ids, + ) diff --git a/quackosm/pbf_file_reader.py b/quackosm/pbf_file_reader.py new file mode 100644 index 0000000..c334dc0 --- /dev/null +++ b/quackosm/pbf_file_reader.py @@ -0,0 +1,1529 @@ +""" +PBF File Reader. + +This module contains a reader capable of parsing a PBF file into a GeoDataFrame. +""" + +import hashlib +import json +import shutil +import tempfile +import warnings +from collections.abc import Iterable +from math import floor +from pathlib import Path +from typing import Any, Literal, NamedTuple, Optional, Union, cast + +import duckdb +import geoarrow.pyarrow as ga +import geopandas as gpd +import pyarrow as pa +import pyarrow.parquet as pq +import shapely.wkt as wktlib +from geoarrow.pyarrow import io +from shapely.geometry.base import BaseGeometry + +from quackosm._constants import FEATURES_INDEX, GEOMETRY_COLUMN, WGS84_CRS +from quackosm._osm_tags_filters import GroupedOsmTagsFilter, OsmTagsFilter, merge_osm_tags_filter +from quackosm._osm_way_polygon_features import ( + OSM_WAY_POLYGON_CONFIG_RAW, + OsmWayPolygonConfig, + parse_dict_to_config_object, +) +from quackosm._typing import is_expected_type + +__all__ = [ + "PbfFileReader", +] + + +class PbfFileReader: + """ + PbfFileReader. + + PBF(Protocolbuffer Binary Format)[1] file reader is a dedicated `*.osm.pbf` files reader + class based on DuckDB[2] and its spatial extension[3]. + + Handler can filter out OSM features based on tags filter and geometry filter + to limit the result. + + References: + 1. https://wiki.openstreetmap.org/wiki/PBF_Format + 2. https://duckdb.org/ + 3. https://github.com/duckdb/duckdb_spatial + """ + + class ConvertedOSMParquetFiles(NamedTuple): + """List of parquet files read from the `*.osm.pbf` file.""" + + nodes_valid_with_tags: "duckdb.DuckDBPyRelation" + nodes_required_ids: "duckdb.DuckDBPyRelation" + nodes_filtered_ids: "duckdb.DuckDBPyRelation" + + ways_all_with_tags: "duckdb.DuckDBPyRelation" + ways_with_unnested_nodes_refs: "duckdb.DuckDBPyRelation" + ways_required_ids: "duckdb.DuckDBPyRelation" + ways_filtered_ids: "duckdb.DuckDBPyRelation" + + relations_all_with_tags: "duckdb.DuckDBPyRelation" + relations_with_unnested_way_refs: "duckdb.DuckDBPyRelation" + relations_filtered_ids: "duckdb.DuckDBPyRelation" + + class ParsedOSMFeatures(NamedTuple): + """Final list of parsed features from the `*.osm.pbf` file.""" + + nodes: "duckdb.DuckDBPyRelation" + ways: "duckdb.DuckDBPyRelation" + relations: "duckdb.DuckDBPyRelation" + + def __init__( + self, + tags_filter: Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]] = None, + geometry_filter: Optional[BaseGeometry] = None, + working_directory: Union[str, Path] = "files", + osm_way_polygon_features_config: Union[ + OsmWayPolygonConfig, dict[str, Any] + ] = OSM_WAY_POLYGON_CONFIG_RAW, + ) -> None: + """ + Initialize PbfFileReader. + + Args: + tags_filter (Union[OsmTagsFilter, GroupedOsmTagsFilter], optional): A dictionary + specifying which tags to download. + The keys should be OSM tags (e.g. `building`, `amenity`). + The values should either be `True` for retrieving all objects with the tag, + string for retrieving a single tag-value pair + or list of strings for retrieving all values specified in the list. + `tags={'leisure': 'park}` would return parks from the area. + `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}` + would return parks, all amenity types, bakeries and bicycle shops. + If `None`, handler will allow all of the tags to be parsed. Defaults to `None`. + geometry_filter (BaseGeometry, optional): Region which can be used to filter only + intersecting OSM objects. Defaults to `None`. + working_directory (Union[str, Path], optional): Directory where to save + the parsed `*.parquet` files. Defaults to "files". + osm_way_polygon_features_config (Union[OsmWayPolygonConfig, dict[str, Any]], optional): + Config used to determine which closed way features are polygons. + Modifications to this config left are left for experienced OSM users. + Defaults to predefined OSM_WAY_POLYGON_FEATURES_CONFIG. + """ + self.tags_filter = tags_filter + self.merged_tags_filter = merge_osm_tags_filter(tags_filter) if tags_filter else None + self.geometry_filter = geometry_filter + self.working_directory = Path(working_directory) + self.working_directory.mkdir(parents=True, exist_ok=True) + self.connection: duckdb.DuckDBPyConnection = None + self.rows_per_bucket = 1_000_000 + self.osm_way_polygon_features_config: OsmWayPolygonConfig = ( + osm_way_polygon_features_config + if isinstance(osm_way_polygon_features_config, OsmWayPolygonConfig) + else parse_dict_to_config_object(osm_way_polygon_features_config) + ) + + def get_features_gdf( + self, + file_paths: Union[str, Path, Iterable[Union[str, Path]]], + explode_tags: Optional[bool] = None, + ignore_cache: bool = False, + filter_osm_ids: Optional[list[str]] = None, + ) -> gpd.GeoDataFrame: + """ + Get features GeoDataFrame from a list of PBF files. + + Function parses multiple PBF files and returns a single GeoDataFrame with parsed + OSM objects. + + Args: + file_paths (Union[str, Path, Iterable[Union[str, Path]]]): + Path or list of paths of `*.osm.pbf` files to be parsed. + explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. + If `None`, will be set based on tags_filter parameter. + If no tags filter is provided, then explode_tags will set to `False`, + if there is tags filter it will set to `True`. Defaults to `None`. + ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not. + Defaults to False. + filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. + Have to be in the form of 'node/', 'way/' or 'relation/'. + Defaults to an empty list. + + Returns: + gpd.GeoDataFrame: GeoDataFrame with OSM features. + """ + if isinstance(file_paths, (str, Path)): + file_paths = [file_paths] + + if filter_osm_ids is None: + filter_osm_ids = [] + + if explode_tags is None: + explode_tags = self.tags_filter is not None + + parsed_geoparquet_files = [] + for file_path in file_paths: + parsed_geoparquet_file = self.convert_pbf_to_gpq( + file_path, + explode_tags=explode_tags, + ignore_cache=ignore_cache, + filter_osm_ids=filter_osm_ids, + ) + parsed_geoparquet_files.append(parsed_geoparquet_file) + + parquet_tables = [ + io.read_geoparquet_table(parsed_parquet_file) + for parsed_parquet_file in parsed_geoparquet_files + ] + joined_parquet_table: pa.Table = pa.concat_tables(parquet_tables) + gdf_parquet = gpd.GeoDataFrame( + data=joined_parquet_table.drop(GEOMETRY_COLUMN).to_pandas(maps_as_pydicts="strict"), + geometry=ga.to_geopandas(joined_parquet_table.column(GEOMETRY_COLUMN)), + ).set_index(FEATURES_INDEX) + + return gdf_parquet + + def convert_pbf_to_gpq( + self, + pbf_path: Union[str, Path], + result_file_path: Optional[Union[str, Path]] = None, + explode_tags: Optional[bool] = None, + ignore_cache: bool = False, + filter_osm_ids: Optional[list[str]] = None, + ) -> Path: + """ + Convert PBF file to GeoParquet file. + + Args: + pbf_path (Union[str, Path]): Pbf file to be parsed to GeoParquet. + result_file_path (Union[str, Path], optional): Where to save + the geoparquet file. If not provided, will be generated based on hashes + from provided tags filter and geometry filter. Defaults to `None`. + explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. + If `None`, will be set based on tags_filter parameter. + If no tags filter is provided, then explode_tags will set to `False`, + if there is tags filter it will set to `True`. Defaults to `None`. + ignore_cache (bool, optional): Whether to ignore precalculated geoparquet files or not. + Defaults to False. + filter_osm_ids: (list[str], optional): List of OSM features ids to read from the file. + Have to be in the form of 'node/', 'way/' or 'relation/'. + Defaults to an empty list. + + Returns: + Path: Path to the generated GeoParquet file. + """ + if filter_osm_ids is None: + filter_osm_ids = [] + + if explode_tags is None: + explode_tags = self.tags_filter is not None + + with tempfile.TemporaryDirectory(dir=self.working_directory.resolve()) as tmp_dir_name: + try: + self._set_up_duckdb_connection(tmp_dir_name) + result_file_path = result_file_path or self._generate_geoparquet_result_file_path( + pbf_path, + filter_osm_ids=filter_osm_ids, + explode_tags=explode_tags, + ) + parsed_geoparquet_file = self._parse_pbf_file( + pbf_path=pbf_path, + tmp_dir_name=tmp_dir_name, + result_file_path=Path(result_file_path), + filter_osm_ids=filter_osm_ids, + explode_tags=explode_tags, + ignore_cache=ignore_cache, + ) + return parsed_geoparquet_file + finally: + if self.connection is not None: + self.connection.close() + self.connection = None + + def _set_up_duckdb_connection(self, tmp_dir_name: str) -> None: + self.connection = duckdb.connect(database=str(Path(tmp_dir_name) / "db.duckdb")) + for extension_name in ("parquet", "spatial"): + self.connection.install_extension(extension_name) + self.connection.load_extension(extension_name) + + self.connection.sql(""" + CREATE OR REPLACE MACRO linestring_to_linestring_wkt(ls) AS + 'LINESTRING (' || array_to_string([pt.x || ' ' || pt.y for pt in ls], ', ') || ')'; + """) + self.connection.sql(""" + CREATE OR REPLACE MACRO linestring_to_polygon_wkt(ls) AS + 'POLYGON ((' || array_to_string([pt.x || ' ' || pt.y for pt in ls], ', ') || '))'; + """) + + def _parse_pbf_file( + self, + pbf_path: Union[str, Path], + tmp_dir_name: str, + result_file_path: Path, + filter_osm_ids: list[str], + explode_tags: bool = True, + ignore_cache: bool = False, + ) -> Path: + if not result_file_path.exists() or ignore_cache: + elements = self.connection.sql(f"SELECT * FROM ST_READOSM('{Path(pbf_path)}');") + converted_osm_parquet_files = self._prefilter_elements_ids( + elements, tmp_dir_name, filter_osm_ids + ) + + self._delete_directories( + tmp_dir_name, + [ + "nodes_filtered_non_distinct_ids", + "nodes_prepared_ids", + "ways_valid_ids", + "ways_filtered_non_distinct_ids", + "relations_valid_ids", + "relations_ids", + ], + ) + + filtered_nodes_with_geometry = self._get_filtered_nodes_with_geometry( + converted_osm_parquet_files, tmp_dir_name + ) + self._delete_directories(tmp_dir_name, "nodes_filtered_ids") + + required_nodes_with_structs = self._get_required_nodes_with_structs( + converted_osm_parquet_files, tmp_dir_name + ) + self._delete_directories( + tmp_dir_name, + [ + "nodes_valid_with_tags", + "nodes_required_ids", + ], + ) + + required_ways_with_linestrings = self._get_required_ways_with_linestrings( + converted_osm_parquet_files, required_nodes_with_structs, tmp_dir_name + ) + self._delete_directories( + tmp_dir_name, + [ + "ways_required_ids_grouped", + "ways_required_ids", + "ways_with_unnested_nodes_refs", + "required_nodes_with_points", + ], + ) + + filtered_ways_with_proper_geometry = self._get_filtered_ways_with_proper_geometry( + converted_osm_parquet_files, required_ways_with_linestrings, tmp_dir_name + ) + self._delete_directories( + tmp_dir_name, + [ + "ways_prepared_ids", + "ways_all_with_tags", + ], + ) + + filtered_relations_with_geometry = self._get_filtered_relations_with_geometry( + converted_osm_parquet_files, required_ways_with_linestrings, tmp_dir_name + ) + self._delete_directories( + tmp_dir_name, + [ + "relations_all_with_tags", + "relations_with_unnested_way_refs", + "relations_filtered_ids", + "required_ways_with_linestrings", + "valid_relation_parts", + "relation_inner_parts", + "relation_outer_parts", + "relation_outer_parts_with_holes", + "relation_outer_parts_without_holes", + ], + ) + + self._concatenate_results_to_geoparquet( + PbfFileReader.ParsedOSMFeatures( + nodes=filtered_nodes_with_geometry, + ways=filtered_ways_with_proper_geometry, + relations=filtered_relations_with_geometry, + ), + tmp_dir_name=tmp_dir_name, + save_file_path=result_file_path, + explode_tags=explode_tags, + ) + + return result_file_path + + def _generate_geoparquet_result_file_path( + self, + pbf_file_path: Union[str, Path], + explode_tags: bool, + filter_osm_ids: list[str], + ) -> Path: + pbf_file_name = Path(pbf_file_path).name.removesuffix(".osm.pbf") + + osm_filter_tags_hash_part = "nofilter" + if self.tags_filter is not None: + h = hashlib.new("sha256") + h.update(json.dumps(self.tags_filter).encode()) + osm_filter_tags_hash_part = h.hexdigest() + + clipping_geometry_hash_part = "noclip" + if self.geometry_filter is not None: + h = hashlib.new("sha256") + h.update(wktlib.dumps(self.geometry_filter).encode()) + clipping_geometry_hash_part = h.hexdigest() + + exploded_tags_part = "exploded" if explode_tags else "compact" + + filter_osm_ids_hash_part = "" + if filter_osm_ids: + h = hashlib.new("sha256") + h.update(json.dumps(sorted(set(filter_osm_ids))).encode()) + filter_osm_ids_hash_part = f"_{h.hexdigest()}" + + result_file_name = ( + f"{pbf_file_name}_{osm_filter_tags_hash_part}" + f"_{clipping_geometry_hash_part}_{exploded_tags_part}{filter_osm_ids_hash_part}.geoparquet" + ) + return Path(self.working_directory) / result_file_name + + def _prefilter_elements_ids( + self, elements: "duckdb.DuckDBPyRelation", tmp_dir_name: str, filter_osm_ids: list[str] + ) -> ConvertedOSMParquetFiles: + sql_filter = self._generate_osm_tags_sql_filter() + filtered_tags_clause = self._generate_filtered_tags_clause() + + is_intersecting = self.geometry_filter is not None + + nodes_prepared_ids_path = Path(tmp_dir_name) / "nodes_prepared_ids" + nodes_prepared_ids_path.mkdir(parents=True, exist_ok=True) + + ways_prepared_ids_path = Path(tmp_dir_name) / "ways_prepared_ids" + ways_prepared_ids_path.mkdir(parents=True, exist_ok=True) + + # NODES - VALID (NV) + # - select all with kind = 'node' + # - select all with lat and lon not empty + nodes_valid_with_tags = self._sql_to_parquet_file( + sql_query=f""" + SELECT + id, + {filtered_tags_clause}, + lon, + lat + FROM ({elements.sql_query()}) + WHERE kind = 'node' + AND lat IS NOT NULL AND lon IS NOT NULL + """, + file_path=Path(tmp_dir_name) / "nodes_valid_with_tags", + ) + # NODES - INTERSECTING (NI) + # - select all from NV which intersect given geometry filter + # NODES - FILTERED (NF) + # - select all from NI with tags filter + filter_osm_node_ids_filter = self._generate_elements_filter(filter_osm_ids, "node") + if is_intersecting: + wkt = cast(BaseGeometry, self.geometry_filter).wkt + intersection_filter = f"ST_Intersects(ST_Point(lon, lat), ST_GeomFromText('{wkt}'))" + nodes_intersecting_ids = self._sql_to_parquet_file( + sql_query=f""" + SELECT DISTINCT id FROM ({nodes_valid_with_tags.sql_query()}) n + WHERE {intersection_filter} = true + """, + file_path=Path(tmp_dir_name) / "nodes_intersecting_ids", + ) + self._sql_to_parquet_file( + sql_query=f""" + SELECT id FROM ({nodes_valid_with_tags.sql_query()}) n + SEMI JOIN ({nodes_intersecting_ids.sql_query()}) ni ON n.id = ni.id + WHERE tags IS NOT NULL AND cardinality(tags) > 0 AND ({sql_filter}) + AND ({filter_osm_node_ids_filter}) + """, + file_path=Path(tmp_dir_name) / "nodes_filtered_non_distinct_ids", + ) + else: + nodes_intersecting_ids = nodes_valid_with_tags + self._sql_to_parquet_file( + sql_query=f""" + SELECT id FROM ({nodes_valid_with_tags.sql_query()}) n + WHERE tags IS NOT NULL AND cardinality(tags) > 0 AND ({sql_filter}) + AND ({filter_osm_node_ids_filter}) + """, + file_path=Path(tmp_dir_name) / "nodes_filtered_non_distinct_ids", + ) + nodes_filtered_ids = self._calculate_unique_ids_to_parquet( + Path(tmp_dir_name) / "nodes_filtered_non_distinct_ids", + Path(tmp_dir_name) / "nodes_filtered_ids", + ) + + # WAYS - VALID (WV) + # - select all with kind = 'way' + # - select all with more then one ref + # - join all NV to refs + # - select all where all refs has been joined (total_refs == found_refs) + self.connection.sql(f""" + SELECT * + FROM ({elements.sql_query()}) w + WHERE kind = 'way' AND len(refs) >= 2 + """).to_view("ways", replace=True) + ways_all_with_tags = self._sql_to_parquet_file( + sql_query=f""" + WITH filtered_tags AS ( + SELECT id, {filtered_tags_clause}, tags as raw_tags + FROM ways w + WHERE tags IS NOT NULL AND cardinality(tags) > 0 + ) + SELECT id, tags, raw_tags + FROM filtered_tags + WHERE tags IS NOT NULL AND cardinality(tags) > 0 + """, + file_path=Path(tmp_dir_name) / "ways_all_with_tags", + ) + ways_with_unnested_nodes_refs = self._sql_to_parquet_file( + sql_query=""" + SELECT w.id, UNNEST(refs) as ref, UNNEST(range(length(refs))) as ref_idx + FROM ways w + """, + file_path=Path(tmp_dir_name) / "ways_with_unnested_nodes_refs", + ) + ways_valid_ids = self._sql_to_parquet_file( + sql_query=f""" + WITH total_ways_with_nodes_refs AS ( + SELECT id, ref + FROM ({ways_with_unnested_nodes_refs.sql_query()}) + ), + unmatched_ways_with_nodes_refs AS ( + SELECT id, ref + FROM ({ways_with_unnested_nodes_refs.sql_query()}) w + ANTI JOIN ({nodes_valid_with_tags.sql_query()}) nv ON nv.id = w.ref + ) + SELECT DISTINCT id + FROM total_ways_with_nodes_refs + EXCEPT + SELECT DISTINCT id + FROM unmatched_ways_with_nodes_refs + """, + file_path=Path(tmp_dir_name) / "ways_valid_ids", + ) + # WAYS - INTERSECTING (WI) + # - select all from WV with joining any from NV on ref + if is_intersecting: + ways_intersecting_ids = self._sql_to_parquet_file( + sql_query=f""" + SELECT DISTINCT uwr.id + FROM ({ways_with_unnested_nodes_refs.sql_query()}) uwr + SEMI JOIN ({ways_valid_ids.sql_query()}) wv ON uwr.id = wv.id + SEMI JOIN ({nodes_intersecting_ids.sql_query()}) n ON n.id = uwr.ref + """, + file_path=Path(tmp_dir_name) / "ways_intersecting_ids", + ) + else: + ways_intersecting_ids = ways_valid_ids + # WAYS - FILTERED (WF) + # - select all from WI with tags filter + filter_osm_way_ids_filter = self._generate_elements_filter(filter_osm_ids, "way") + self._sql_to_parquet_file( + sql_query=f""" + SELECT id FROM ({ways_all_with_tags.sql_query()}) w + SEMI JOIN ({ways_intersecting_ids.sql_query()}) wi ON w.id = wi.id + WHERE ({sql_filter}) AND ({filter_osm_way_ids_filter}) + """, + file_path=Path(tmp_dir_name) / "ways_filtered_non_distinct_ids", + ) + ways_filtered_ids = self._calculate_unique_ids_to_parquet( + Path(tmp_dir_name) / "ways_filtered_non_distinct_ids", + ways_prepared_ids_path / "filtered", + ) + + # RELATIONS - VALID (RV) + # - select all with kind = 'relation' + # - select all with more then one ref + # - select all with type in ['boundary', 'multipolygon'] + # - join all WV to refs + # - select all where all refs has been joined (total_refs == found_refs) + self.connection.sql(f""" + SELECT * + FROM ({elements.sql_query()}) + WHERE kind = 'relation' AND len(refs) > 0 + AND list_contains(map_keys(tags), 'type') + AND list_has_any(map_extract(tags, 'type'), ['boundary', 'multipolygon']) + """).to_view("relations", replace=True) + relations_all_with_tags = self._sql_to_parquet_file( + sql_query=f""" + WITH filtered_tags AS ( + SELECT id, {filtered_tags_clause} + FROM relations r + WHERE tags IS NOT NULL AND cardinality(tags) > 0 + ) + SELECT id, tags + FROM filtered_tags + WHERE tags IS NOT NULL AND cardinality(tags) > 0 + """, + file_path=Path(tmp_dir_name) / "relations_all_with_tags", + ) + relations_with_unnested_way_refs = self._sql_to_parquet_file( + sql_query=""" + WITH unnested_relation_refs AS ( + SELECT + r.id, + UNNEST(refs) as ref, + UNNEST(ref_types) as ref_type, + UNNEST(ref_roles) as ref_role, + UNNEST(range(length(refs))) as ref_idx + FROM relations r + ) + SELECT id, ref, ref_role, ref_idx + FROM unnested_relation_refs + WHERE ref_type = 'way' + """, + file_path=Path(tmp_dir_name) / "relations_with_unnested_way_refs", + ) + relations_valid_ids = self._sql_to_parquet_file( + sql_query=f""" + WITH total_relation_refs AS ( + SELECT id, ref + FROM ({relations_with_unnested_way_refs.sql_query()}) frr + ), + unmatched_relation_refs AS ( + SELECT id, ref + FROM ({relations_with_unnested_way_refs.sql_query()}) r + ANTI JOIN ({ways_valid_ids.sql_query()}) wv ON wv.id = r.ref + ) + SELECT DISTINCT id + FROM total_relation_refs + EXCEPT + SELECT DISTINCT id + FROM unmatched_relation_refs + """, + file_path=Path(tmp_dir_name) / "relations_valid_ids", + ) + # RELATIONS - INTERSECTING (RI) + # - select all from RW with joining any from RV on ref + if is_intersecting: + relations_intersecting_ids = self._sql_to_parquet_file( + sql_query=f""" + SELECT frr.id + FROM ({relations_with_unnested_way_refs.sql_query()}) frr + SEMI JOIN ({relations_valid_ids.sql_query()}) rv ON frr.id = rv.id + SEMI JOIN ({ways_intersecting_ids.sql_query()}) wi ON wi.id = frr.ref + """, + file_path=Path(tmp_dir_name) / "relations_intersecting_ids", + ) + else: + relations_intersecting_ids = relations_valid_ids + # RELATIONS - FILTERED (RF) + # - select all from RI with tags filter + filter_osm_relation_ids_filter = self._generate_elements_filter(filter_osm_ids, "relation") + + relations_ids_path = Path(tmp_dir_name) / "relations_ids" + relations_ids_path.mkdir(parents=True, exist_ok=True) + self._sql_to_parquet_file( + sql_query=f""" + SELECT id FROM ({relations_all_with_tags.sql_query()}) r + SEMI JOIN ({relations_intersecting_ids.sql_query()}) ri ON r.id = ri.id + WHERE ({sql_filter}) AND ({filter_osm_relation_ids_filter}) + """, + file_path=relations_ids_path / "filtered", + ) + relations_filtered_ids = self._calculate_unique_ids_to_parquet( + relations_ids_path / "filtered", Path(tmp_dir_name) / "relations_filtered_ids" + ) + + # WAYS - REQUIRED (WR) + # - required - all IDs from WF + # + all needed to construct relations from RF + self._sql_to_parquet_file( + sql_query=f""" + SELECT ref as id + FROM ({relations_with_unnested_way_refs.sql_query()}) frr + SEMI JOIN ({relations_filtered_ids.sql_query()}) fri ON fri.id = frr.id + """, + file_path=ways_prepared_ids_path / "required_by_relations", + ) + ways_required_ids = self._calculate_unique_ids_to_parquet( + ways_prepared_ids_path, Path(tmp_dir_name) / "ways_required_ids" + ) + + # NODES - REQUIRED (WR) + # - required - all IDs from NF + # + all needed to construct ways from WR + # + and needed to construct ways from WF + self._sql_to_parquet_file( + sql_query=f""" + SELECT ref as id + FROM ({ways_with_unnested_nodes_refs.sql_query()}) uwr + SEMI JOIN ({ways_required_ids.sql_query()}) rwi ON rwi.id = uwr.id + """, + file_path=nodes_prepared_ids_path / "required_by_relations", + ) + self._sql_to_parquet_file( + sql_query=f""" + SELECT ref as id + FROM ({ways_with_unnested_nodes_refs.sql_query()}) uwr + SEMI JOIN ({ways_filtered_ids.sql_query()}) fwi ON fwi.id = uwr.id + """, + file_path=nodes_prepared_ids_path / "required_by_ways", + ) + nodes_required_ids = self._calculate_unique_ids_to_parquet( + nodes_prepared_ids_path, Path(tmp_dir_name) / "nodes_required_ids" + ) + + return PbfFileReader.ConvertedOSMParquetFiles( + nodes_valid_with_tags=nodes_valid_with_tags, + nodes_required_ids=nodes_required_ids, + nodes_filtered_ids=nodes_filtered_ids, + ways_all_with_tags=ways_all_with_tags, + ways_with_unnested_nodes_refs=ways_with_unnested_nodes_refs, + ways_required_ids=ways_required_ids, + ways_filtered_ids=ways_filtered_ids, + relations_all_with_tags=relations_all_with_tags, + relations_with_unnested_way_refs=relations_with_unnested_way_refs, + relations_filtered_ids=relations_filtered_ids, + ) + + def _delete_directories( + self, tmp_dir_name: Union[Path, str], directories: Union[str, list[str]] + ) -> None: + if isinstance(directories, str): + directories = [directories] + for directory in directories: + directory_path = Path(tmp_dir_name) / directory + if not directory_path.exists(): + continue + shutil.rmtree(directory_path) + + def _generate_osm_tags_sql_filter(self) -> str: + """Prepare features filter clauses based on tags filter.""" + filter_clauses = ["(1=1)"] + + if self.merged_tags_filter: + filter_clauses.clear() + + for filter_tag_key, filter_tag_value in self.merged_tags_filter.items(): + if isinstance(filter_tag_value, bool) and filter_tag_value: + filter_clauses.append(f"(list_contains(map_keys(tags), '{filter_tag_key}'))") + elif isinstance(filter_tag_value, str): + escaped_value = self._sql_escape(filter_tag_value) + filter_clauses.append( + f"list_extract(map_extract(tags, '{filter_tag_key}'), 1) =" + f" '{escaped_value}'" + ) + elif isinstance(filter_tag_value, list) and filter_tag_value: + values_list = [f"'{self._sql_escape(value)}'" for value in filter_tag_value] + filter_clauses.append( + f"list_extract(map_extract(tags, '{filter_tag_key}'), 1) IN" + f" ({', '.join(values_list)})" + ) + + return " OR ".join(filter_clauses) + + def _generate_filtered_tags_clause(self) -> str: + """Prepare filtered tags clause by removing tags commonly ignored by OGR.""" + tags_to_ignore = [ + "area", + "created_by", + "converted_by", + "source", + "time", + "ele", + "note", + "todo", + "fixme", + "FIXME", + "openGeoDB:", + ] + escaped_tags_to_ignore = [f"'{tag}'" for tag in tags_to_ignore] + + return f""" + map_from_entries( + [ + tag_entry + for tag_entry in map_entries(tags) + if not tag_entry.key in ({','.join(escaped_tags_to_ignore)}) + and not starts_with(tag_entry.key, 'openGeoDB:') + ] + ) as tags + """ + + def _generate_elements_filter( + self, filter_osm_ids: list[str], element_type: Literal["node", "way", "relation"] + ) -> str: + filter_osm_relation_ids = [ + osm_id.replace(f"{element_type}/", "") + for osm_id in filter_osm_ids + if osm_id.startswith(f"{element_type}/") + ] + if not filter_osm_ids: + filter_osm_ids_filter = "1=1" + elif filter_osm_relation_ids: + filter_osm_ids_filter = f"id in ({','.join(filter_osm_relation_ids)})" + else: + filter_osm_ids_filter = "id IS NULL" + + return filter_osm_ids_filter + + def _sql_escape(self, value: str) -> str: + """Escape value for SQL query.""" + return value.replace("'", "''") + + def _sql_to_parquet_file(self, sql_query: str, file_path: Path) -> "duckdb.DuckDBPyRelation": + relation = self.connection.sql(sql_query) + return self._save_parquet_file(relation, file_path) + + def _save_parquet_file( + self, relation: "duckdb.DuckDBPyRelation", file_path: Path + ) -> "duckdb.DuckDBPyRelation": + self.connection.sql(f""" + COPY ( + SELECT * FROM ({relation.sql_query()}) + ) TO '{file_path}' (FORMAT 'parquet', PER_THREAD_OUTPUT true, ROW_GROUP_SIZE 25000) + """) + return self.connection.sql(f""" + SELECT * FROM read_parquet('{file_path}/**') + """) + + def _calculate_unique_ids_to_parquet( + self, file_path: Path, result_path: Optional[Path] = None + ) -> "duckdb.DuckDBPyRelation": + if result_path is None: + result_path = file_path / "distinct" + + self.connection.sql(f""" + COPY ( + SELECT id FROM read_parquet('{file_path}/**') GROUP BY id + ) TO '{result_path}' (FORMAT 'parquet', PER_THREAD_OUTPUT true, ROW_GROUP_SIZE 25000) + """) + + return self.connection.sql(f""" + SELECT * FROM read_parquet('{result_path}/**') + """) + + def _get_filtered_nodes_with_geometry( + self, + osm_parquet_files: ConvertedOSMParquetFiles, + tmp_dir_name: str, + ) -> "duckdb.DuckDBPyRelation": + nodes_with_geometry = self.connection.sql(f""" + SELECT + n.id, + n.tags, + ST_Point(round(n.lon, 7), round(n.lat, 7)) geometry + FROM ({osm_parquet_files.nodes_valid_with_tags.sql_query()}) n + SEMI JOIN ({osm_parquet_files.nodes_filtered_ids.sql_query()}) fn ON n.id = fn.id + """) + nodes_parquet = self._save_parquet_file_with_geometry( + relation=nodes_with_geometry, + file_path=Path(tmp_dir_name) / "filtered_nodes_with_geometry", + ) + return nodes_parquet + + def _get_required_nodes_with_structs( + self, + osm_parquet_files: ConvertedOSMParquetFiles, + tmp_dir_name: str, + ) -> "duckdb.DuckDBPyRelation": + nodes_with_structs = self.connection.sql(f""" + SELECT + n.id, + struct_pack(x := round(n.lon, 7), y := round(n.lat, 7))::POINT_2D point + FROM ({osm_parquet_files.nodes_valid_with_tags.sql_query()}) n + SEMI JOIN ({osm_parquet_files.nodes_required_ids.sql_query()}) rn ON n.id = rn.id + """) + nodes_parquet = self._save_parquet_file( + relation=nodes_with_structs, + file_path=Path(tmp_dir_name) / "required_nodes_with_points", + ) + return nodes_parquet + + def _get_required_ways_with_linestrings( + self, + osm_parquet_files: ConvertedOSMParquetFiles, + required_nodes_with_structs: "duckdb.DuckDBPyRelation", + tmp_dir_name: str, + ) -> "duckdb.DuckDBPyRelation": + total_required_ways = osm_parquet_files.ways_required_ids.count("id").fetchone()[0] + + required_ways_with_linestrings_path = Path(tmp_dir_name) / "required_ways_with_linestrings" + required_ways_with_linestrings_path.mkdir(parents=True, exist_ok=True) + + if total_required_ways == 0: + empty_file_path = str(required_ways_with_linestrings_path / "empty.parquet") + self.connection.sql("CREATE OR REPLACE TABLE x(id STRING, linestring LINESTRING_2D);") + self.connection.table("x").to_parquet(empty_file_path) + return self.connection.read_parquet(empty_file_path) + + groups = floor(total_required_ways / self.rows_per_bucket) + grouped_required_ways_ids_path = Path(tmp_dir_name) / "ways_required_ids_grouped" + self.connection.sql(f""" + COPY ( + SELECT + *, + floor( + row_number() OVER (ORDER BY id) / {self.rows_per_bucket} + )::INTEGER as "group", + FROM ({osm_parquet_files.ways_required_ids.sql_query()}) + ) TO '{grouped_required_ways_ids_path}' + (FORMAT 'parquet', PARTITION_BY ("group"), ROW_GROUP_SIZE 25000) + """) + + for group in range(groups + 1): + current_required_ways_ids_group_path = grouped_required_ways_ids_path / f"group={group}" + current_required_ways_ids_group_relation = self.connection.sql(f""" + SELECT * FROM read_parquet('{current_required_ways_ids_group_path}/**') + """) + + ways_with_linestrings = self.connection.sql(f""" + SELECT id, list(point ORDER BY ref_idx ASC)::LINESTRING_2D linestring + FROM ( + SELECT w.id, n.point, w.ref_idx + FROM ({osm_parquet_files.ways_with_unnested_nodes_refs.sql_query()}) w + SEMI JOIN ({current_required_ways_ids_group_relation.sql_query()}) rw + ON w.id = rw.id + JOIN ({required_nodes_with_structs.sql_query()}) n + ON n.id = w.ref + ) + GROUP BY id + """) + self._save_parquet_file( + relation=ways_with_linestrings, + file_path=required_ways_with_linestrings_path / f"group={group}", + ) + + ways_parquet = self.connection.sql(f""" + SELECT * FROM read_parquet('{required_ways_with_linestrings_path}/**') + """) + return ways_parquet + + def _get_filtered_ways_with_proper_geometry( + self, + osm_parquet_files: ConvertedOSMParquetFiles, + required_ways_with_linestrings: "duckdb.DuckDBPyRelation", + tmp_dir_name: str, + ) -> "duckdb.DuckDBPyRelation": + osm_way_polygon_features_filter_clauses = [ + "list_contains(map_keys(raw_tags), 'area') AND " + "list_extract(map_extract(raw_tags, 'area'), 1) = 'yes'" + ] + + for osm_tag_key in self.osm_way_polygon_features_config.all: + osm_way_polygon_features_filter_clauses.append( + f"list_contains(map_keys(raw_tags), '{osm_tag_key}')" + ) + + for osm_tag_key, osm_tag_values in self.osm_way_polygon_features_config.allowlist.items(): + escaped_values = ",".join( + [f"'{self._sql_escape(osm_tag_value)}'" for osm_tag_value in osm_tag_values] + ) + osm_way_polygon_features_filter_clauses.append( + f"list_contains(map_keys(raw_tags), '{osm_tag_key}') AND" + f" list_has_any(map_extract(raw_tags, '{osm_tag_key}'), [{escaped_values}])" + ) + + for osm_tag_key, osm_tag_values in self.osm_way_polygon_features_config.denylist.items(): + escaped_values = ",".join( + [f"'{self._sql_escape(osm_tag_value)}'" for osm_tag_value in osm_tag_values] + ) + osm_way_polygon_features_filter_clauses.append( + f"list_contains(map_keys(raw_tags), '{osm_tag_key}') AND NOT" + f" list_has_any(map_extract(raw_tags, '{osm_tag_key}'), [{escaped_values}])" + ) + + ways_with_proper_geometry = self.connection.sql(f""" + WITH required_ways_with_linestrings AS ( + SELECT + w.id, + w.tags, + w_l.linestring, + -- Filter below is based on `_is_closed_way_a_polygon` function from OSMnx + -- Filter values are built dynamically from a config. + ( + -- if first and last nodes are the same + ST_Equals(linestring[1]::POINT_2D, linestring[-1]::POINT_2D) + -- if the element doesn't have any tags leave it as a Linestring + AND raw_tags IS NOT NULL + -- if the element is specifically tagged 'area':'no' -> LineString + AND NOT ( + list_contains(map_keys(raw_tags), 'area') + AND list_extract(map_extract(raw_tags, 'area'), 1) = 'no' + ) + AND ({' OR '.join(osm_way_polygon_features_filter_clauses)}) + ) AS is_polygon + FROM ({required_ways_with_linestrings.sql_query()}) w_l + SEMI JOIN ({osm_parquet_files.ways_filtered_ids.sql_query()}) fw ON w_l.id = fw.id + JOIN ({osm_parquet_files.ways_all_with_tags.sql_query()}) w ON w.id = w_l.id + ), + proper_geometries AS ( + SELECT + id, + tags, + (CASE + WHEN is_polygon + THEN linestring_to_polygon_wkt(linestring) + ELSE linestring_to_linestring_wkt(linestring) + END)::GEOMETRY AS geometry + FROM + required_ways_with_linestrings w + ) + SELECT id, tags, geometry FROM proper_geometries + """) + ways_parquet = self._save_parquet_file_with_geometry( + relation=ways_with_proper_geometry, + file_path=Path(tmp_dir_name) / "filtered_ways_with_geometry", + ) + return ways_parquet + + def _get_filtered_relations_with_geometry( + self, + osm_parquet_files: ConvertedOSMParquetFiles, + required_ways_with_linestrings: "duckdb.DuckDBPyRelation", + tmp_dir_name: str, + ) -> "duckdb.DuckDBPyRelation": + valid_relation_parts = self.connection.sql(f""" + WITH unnested_relations AS ( + SELECT + r.id, + COALESCE(r.ref_role, 'outer') as ref_role, + r.ref, + linestring_to_linestring_wkt(w.linestring)::GEOMETRY as geometry + FROM ({osm_parquet_files.relations_with_unnested_way_refs.sql_query()}) r + SEMI JOIN ({osm_parquet_files.relations_filtered_ids.sql_query()}) fr + ON r.id = fr.id + JOIN ({required_ways_with_linestrings.sql_query()}) w + ON w.id = r.ref + ORDER BY r.id, r.ref_idx + ), + any_outer_refs AS ( + SELECT id, bool_or(ref_role == 'outer') any_outer_refs + FROM unnested_relations + GROUP BY id + ), + relations_with_geometries AS ( + SELECT + x.id, + CASE WHEN aor.any_outer_refs + THEN x.ref_role ELSE 'outer' + END as ref_role, + x.geom geometry, + row_number() OVER (PARTITION BY x.id) as geometry_id + FROM ( + SELECT + id, + ref_role, + UNNEST( + ST_Dump(ST_LineMerge(ST_Collect(list(geometry)))), recursive := true + ), + FROM unnested_relations + GROUP BY id, ref_role + ) x + JOIN any_outer_refs aor ON aor.id = x.id + WHERE ST_NPoints(geom) >= 4 + ), + valid_relations AS ( + SELECT id, is_valid + FROM ( + SELECT + id, + bool_and( + ST_Equals(ST_StartPoint(geometry), ST_EndPoint(geometry)) + ) is_valid + FROM relations_with_geometries + GROUP BY id + ) + WHERE is_valid = true + ) + SELECT * FROM relations_with_geometries + SEMI JOIN valid_relations ON relations_with_geometries.id = valid_relations.id + """) + valid_relation_parts_parquet = self._save_parquet_file_with_geometry( + relation=valid_relation_parts, + file_path=Path(tmp_dir_name) / "valid_relation_parts", + ) + relation_inner_parts = self.connection.sql(f""" + SELECT id, geometry_id, ST_MakePolygon(geometry) geometry + FROM ({valid_relation_parts_parquet.sql_query()}) + WHERE ref_role = 'inner' + """) + relation_inner_parts_parquet = self._save_parquet_file_with_geometry( + relation=relation_inner_parts, + file_path=Path(tmp_dir_name) / "relation_inner_parts", + fix_geometries=True, + ) + relation_outer_parts = self.connection.sql(f""" + SELECT id, geometry_id, ST_MakePolygon(geometry) geometry + FROM ({valid_relation_parts_parquet.sql_query()}) + WHERE ref_role = 'outer' + """) + relation_outer_parts_parquet = self._save_parquet_file_with_geometry( + relation=relation_outer_parts, + file_path=Path(tmp_dir_name) / "relation_outer_parts", + fix_geometries=True, + ) + relation_outer_parts_with_holes = self.connection.sql(f""" + SELECT + og.id, + og.geometry_id, + ST_Difference(any_value(og.geometry), ST_Union_Agg(ig.geometry)) geometry + FROM ({relation_outer_parts_parquet.sql_query()}) og + JOIN ({relation_inner_parts_parquet.sql_query()}) ig + ON og.id = ig.id AND ST_WITHIN(ig.geometry, og.geometry) + GROUP BY og.id, og.geometry_id + """) + relation_outer_parts_with_holes_parquet = self._save_parquet_file_with_geometry( + relation=relation_outer_parts_with_holes, + file_path=Path(tmp_dir_name) / "relation_outer_parts_with_holes", + ) + relation_outer_parts_without_holes = self.connection.sql(f""" + SELECT + og.id, + og.geometry_id, + og.geometry + FROM ({relation_outer_parts_parquet.sql_query()}) og + ANTI JOIN ({relation_outer_parts_with_holes_parquet.sql_query()}) ogwh + ON og.id = ogwh.id AND og.geometry_id = ogwh.geometry_id + """) + relation_outer_parts_without_holes_parquet = self._save_parquet_file_with_geometry( + relation=relation_outer_parts_without_holes, + file_path=Path(tmp_dir_name) / "relation_outer_parts_without_holes", + ) + relations_with_geometry = self.connection.sql(f""" + WITH unioned_outer_geometries AS ( + SELECT id, geometry + FROM ({relation_outer_parts_with_holes_parquet.sql_query()}) + UNION ALL + SELECT id, geometry + FROM ({relation_outer_parts_without_holes_parquet.sql_query()}) + ), + final_geometries AS ( + SELECT id, ST_Union_Agg(geometry) geometry + FROM unioned_outer_geometries + GROUP BY id + ) + SELECT r_g.id, r.tags, r_g.geometry + FROM final_geometries r_g + JOIN ({osm_parquet_files.relations_all_with_tags.sql_query()}) r + ON r.id = r_g.id + """) + relations_parquet = self._save_parquet_file_with_geometry( + relation=relations_with_geometry, + file_path=Path(tmp_dir_name) / "filtered_relations_with_geometry", + ) + return relations_parquet + + def _save_parquet_file_with_geometry( + self, relation: "duckdb.DuckDBPyRelation", file_path: Path, fix_geometries: bool = False + ) -> "duckdb.DuckDBPyRelation": + if not fix_geometries: + self.connection.sql(f""" + COPY ( + SELECT + * EXCLUDE (geometry), ST_AsWKB(geometry) geometry_wkb + FROM ({relation.sql_query()}) + ) TO '{file_path}' (FORMAT 'parquet', PER_THREAD_OUTPUT true, ROW_GROUP_SIZE 25000) + """) + else: + valid_path = file_path / "valid" + invalid_path = file_path / "invalid" + fixed_path = file_path / "fixed" + + valid_path.mkdir(parents=True, exist_ok=True) + invalid_path.mkdir(parents=True, exist_ok=True) + fixed_path.mkdir(parents=True, exist_ok=True) + + # Save valid features + self.connection.sql(f""" + COPY ( + SELECT + * EXCLUDE (geometry), ST_AsWKB(geometry) geometry_wkb + FROM ({relation.sql_query()}) + WHERE ST_IsValid(geometry) + ) TO '{valid_path}' (FORMAT 'parquet', PER_THREAD_OUTPUT true, ROW_GROUP_SIZE 25000) + """) + + # Save invalid features + self.connection.sql(f""" + COPY ( + SELECT + * EXCLUDE (geometry), ST_AsWKB(geometry) geometry_wkb, + floor( + row_number() OVER () / {self.rows_per_bucket} + )::INTEGER as "group", + FROM ({relation.sql_query()}) + WHERE NOT ST_IsValid(geometry) + ) TO '{invalid_path}' ( + FORMAT 'parquet', PARTITION_BY ("group"), ROW_GROUP_SIZE 25000 + ) + """) + + # Fix invalid features + group_id = 0 + current_invalid_features_group_path = invalid_path / f"group={group_id}" + while current_invalid_features_group_path.exists(): + current_invalid_features_group_table = pq.read_table( + current_invalid_features_group_path + ).drop("group") + valid_geometry_column = ga.as_wkb( + ga.as_geoarrow( + ga.to_geopandas( + ga.with_crs( + current_invalid_features_group_table.column("geometry_wkb"), + WGS84_CRS, + ) + ).make_valid() + ) + ) + current_invalid_features_group_table = current_invalid_features_group_table.drop( + "geometry_wkb" + ) + + current_invalid_features_group_table = ( + current_invalid_features_group_table.append_column( + "geometry_wkb", valid_geometry_column + ) + ) + pq.write_table( + current_invalid_features_group_table, fixed_path / f"data_{group_id}.parquet" + ) + group_id += 1 + current_invalid_features_group_path = invalid_path / f"group={group_id}" + + self._delete_directories(invalid_path.parent, ["invalid"]) + + return self.connection.sql(f""" + SELECT * EXCLUDE (geometry_wkb), ST_GeomFromWKB(geometry_wkb) geometry + FROM read_parquet('{file_path}/**') + """) + + def _concatenate_results_to_geoparquet( + self, + parsed_data: ParsedOSMFeatures, + tmp_dir_name: str, + save_file_path: Path, + explode_tags: bool, + ) -> None: + select_clauses = [ + *self._generate_osm_tags_sql_select(parsed_data, explode_tags), + "geometry", + ] + + node_select_clauses = ["'node/' || id as feature_id", *select_clauses] + way_select_clauses = ["'way/' || id as feature_id", *select_clauses] + relation_select_clauses = ["'relation/' || id as feature_id", *select_clauses] + + unioned_features = self.connection.sql(f""" + SELECT {', '.join(node_select_clauses)} + FROM ({parsed_data.nodes.sql_query()}) n + UNION ALL + SELECT {', '.join(way_select_clauses)} + FROM ({parsed_data.ways.sql_query()}) w + UNION ALL + SELECT {', '.join(relation_select_clauses)} + FROM ({parsed_data.relations.sql_query()}) r + """) + + grouped_features = self._parse_features_relation_to_groups(unioned_features, explode_tags) + + valid_features_full_relation = self.connection.sql(f""" + SELECT * FROM ({grouped_features.sql_query()}) + WHERE ST_IsValid(geometry) + """) + + valid_features_parquet_path = Path(tmp_dir_name) / "osm_valid_elements" + valid_features_parquet_relation = self._save_parquet_file_with_geometry( + valid_features_full_relation, + valid_features_parquet_path, + ) + + valid_features_parquet_table = pq.read_table(valid_features_parquet_path) + + is_empty = valid_features_parquet_table.num_rows == 0 + + if not is_empty: + geometry_column = ga.as_wkb( + ga.with_crs(valid_features_parquet_table.column("geometry_wkb"), WGS84_CRS) + ) + else: + geometry_column = ga.as_wkb(gpd.GeoSeries([], crs=WGS84_CRS)) + + valid_features_parquet_table = valid_features_parquet_table.append_column( + GEOMETRY_COLUMN, geometry_column + ) + valid_features_parquet_table = valid_features_parquet_table.drop("geometry_wkb") + + parquet_tables = [valid_features_parquet_table] + + invalid_features_full_relation = self.connection.sql(f""" + SELECT * FROM ({grouped_features.sql_query()}) a + ANTI JOIN ({valid_features_parquet_relation.sql_query()}) b + ON a.feature_id = b.feature_id + """) + + total_nodes = parsed_data.nodes.count("id").fetchone()[0] + total_ways = parsed_data.ways.count("id").fetchone()[0] + total_relations = parsed_data.relations.count("id").fetchone()[0] + total_features = total_nodes + total_ways + total_relations + + valid_features = valid_features_parquet_relation.count("feature_id").fetchone()[0] + + invalid_features = total_features - valid_features + + if invalid_features > 0: + groups = floor(invalid_features / self.rows_per_bucket) + grouped_invalid_features_result_parquet = ( + Path(tmp_dir_name) / "osm_invalid_elements_grouped" + ) + self.connection.sql(f""" + COPY ( + SELECT + * EXCLUDE (geometry), ST_AsWKB(geometry) geometry_wkb, + floor( + row_number() OVER (ORDER BY feature_id) / {self.rows_per_bucket} + )::INTEGER as "group", + FROM ({invalid_features_full_relation.sql_query()}) + ) TO '{grouped_invalid_features_result_parquet}' + (FORMAT 'parquet', PARTITION_BY ("group"), ROW_GROUP_SIZE 25000) + """) + + for group in range(groups + 1): + current_invalid_features_group_path = ( + grouped_invalid_features_result_parquet / f"group={group}" + ) + current_invalid_features_group_table = pq.read_table( + current_invalid_features_group_path + ).drop("group") + valid_geometry_column = ga.as_wkb( + ga.as_geoarrow( + ga.to_geopandas( + ga.with_crs( + current_invalid_features_group_table.column("geometry_wkb"), + WGS84_CRS, + ) + ).make_valid() + ) + ) + + current_invalid_features_group_table = ( + current_invalid_features_group_table.append_column( + GEOMETRY_COLUMN, valid_geometry_column + ) + ) + current_invalid_features_group_table = current_invalid_features_group_table.drop( + "geometry_wkb" + ) + parquet_tables.append(current_invalid_features_group_table) + + joined_parquet_table: pa.Table = pa.concat_tables(parquet_tables) + + is_empty = joined_parquet_table.num_rows == 0 + + empty_columns = [] + for column_name in joined_parquet_table.column_names: + if column_name in (FEATURES_INDEX, GEOMETRY_COLUMN): + continue + if ( + is_empty + or pa.compute.all( + pa.compute.is_null(joined_parquet_table.column(column_name)) + ).as_py() + ): + empty_columns.append(column_name) + + if empty_columns: + joined_parquet_table = joined_parquet_table.drop(empty_columns) + + io.write_geoparquet_table( + joined_parquet_table, save_file_path, primary_geometry_column=GEOMETRY_COLUMN + ) + + def _generate_osm_tags_sql_select( + self, parsed_data: ParsedOSMFeatures, explode_tags: bool + ) -> list[str]: + """Prepare features filter clauses based on tags filter.""" + osm_tag_keys_select_clauses = [] + + # TODO: elif keep other tags + if not self.merged_tags_filter and not explode_tags: + osm_tag_keys_select_clauses = ["tags"] + elif not self.merged_tags_filter and explode_tags: + osm_tag_keys = set() + for elements in ( + parsed_data.nodes, + parsed_data.ways, + parsed_data.relations, + ): + found_tag_keys = [row[0] for row in self.connection.sql(f""" + SELECT DISTINCT UNNEST(map_keys(tags)) tag_key + FROM ({elements.sql_query()}) + """).fetchall()] + osm_tag_keys.update(found_tag_keys) + osm_tag_keys_select_clauses = [ + f"list_extract(map_extract(tags, '{osm_tag_key}'), 1) as \"{osm_tag_key}\"" + for osm_tag_key in sorted(list(osm_tag_keys)) + ] + elif self.merged_tags_filter and not explode_tags: + filter_tag_clauses = [] + for filter_tag_key, filter_tag_value in self.merged_tags_filter.items(): + if isinstance(filter_tag_value, bool) and filter_tag_value: + filter_tag_clauses.append(f"tag_entry.key = '{filter_tag_key}'") + elif isinstance(filter_tag_value, str): + escaped_value = self._sql_escape(filter_tag_value) + filter_tag_clauses.append( + f"(tag_entry.key = '{filter_tag_key}' AND tag_entry.value =" + f" '{escaped_value}')" + ) + elif isinstance(filter_tag_value, list) and filter_tag_value: + values_list = [f"'{self._sql_escape(value)}'" for value in filter_tag_value] + filter_tag_clauses.append( + f"(tag_entry.key = '{filter_tag_key}' AND tag_entry.value IN" + f" ({', '.join(values_list)}))" + ) + osm_tag_keys_select_clauses = [f""" + map_from_entries( + [ + tag_entry + for tag_entry in map_entries(tags) + if {" OR ".join(filter_tag_clauses)} + ] + ) as tags + """] + elif self.merged_tags_filter and explode_tags: + for filter_tag_key, filter_tag_value in self.merged_tags_filter.items(): + if isinstance(filter_tag_value, bool) and filter_tag_value: + osm_tag_keys_select_clauses.append( + f"list_extract(map_extract(tags, '{filter_tag_key}'), 1) as" + f' "{filter_tag_key}"' + ) + elif isinstance(filter_tag_value, str): + escaped_value = self._sql_escape(filter_tag_value) + osm_tag_keys_select_clauses.append(f""" + CASE WHEN list_extract( + map_extract(tags, '{filter_tag_key}'), 1 + ) = '{escaped_value}' + THEN '{escaped_value}' + ELSE NULL + END as "{filter_tag_key}" + """) + elif isinstance(filter_tag_value, list) and filter_tag_value: + values_list = [f"'{self._sql_escape(value)}'" for value in filter_tag_value] + osm_tag_keys_select_clauses.append(f""" + CASE WHEN list_extract( + map_extract(tags, '{filter_tag_key}'), 1 + ) IN ({', '.join(values_list)}) + THEN list_extract(map_extract(tags, '{filter_tag_key}'), 1) + ELSE NULL + END as "{filter_tag_key}" + """) + + if len(osm_tag_keys_select_clauses) > 100: + warnings.warn( + "Select clause contains more than 100 columns" + f" (found {len(osm_tag_keys_select_clauses)} columns)." + " Query might fail with insufficient memory resources." + " Consider applying more restrictive OsmTagsFilter for parsing.", + stacklevel=1, + ) + + return osm_tag_keys_select_clauses + + def _parse_features_relation_to_groups( + self, + features_relation: "duckdb.DuckDBPyRelation", + explode_tags: bool, + ) -> "duckdb.DuckDBPyRelation": + """ + Optionally group raw OSM features into groups defined in `GroupedOsmTagsFilter`. + + Creates new features based on definition from `GroupedOsmTagsFilter`. + Returns transformed DuckDB relation with columns based on group names from the filter. + Values are built by concatenation of matching tag key and value with + an equal sign (eg. amenity=parking). Since many tags can match a definition + of a single group, a first match is used as a feature value. + + Args: + features_relation (duckdb.DuckDBPyRelation): Generated features from the loader. + explode_tags (bool, optional): Whether to split tags into columns based on OSM tag keys. + Defaults to True. + + Returns: + duckdb.DuckDBPyRelation: Parsed features_relation. + """ + if not self.tags_filter or not is_expected_type(self.tags_filter, GroupedOsmTagsFilter): + return features_relation + + grouped_features_relation: "duckdb.DuckDBPyRelation" + grouped_tags_filter = cast(GroupedOsmTagsFilter, self.tags_filter) + + if explode_tags: + case_clauses = [] + for group_name in sorted(grouped_tags_filter.keys()): + osm_filter = grouped_tags_filter[group_name] + case_when_clauses = [] + for osm_tag_key, osm_tag_value in osm_filter.items(): + if isinstance(osm_tag_value, bool) and osm_tag_value: + case_when_clauses.append( + f"WHEN \"{osm_tag_key}\" IS NOT NULL THEN '{osm_tag_key}=' ||" + f' "{osm_tag_key}"' + ) + elif isinstance(osm_tag_value, str): + escaped_value = self._sql_escape(osm_tag_value) + case_when_clauses.append( + f"WHEN \"{osm_tag_key}\" = '{escaped_value}' THEN '{osm_tag_key}=' ||" + f' "{osm_tag_key}"' + ) + elif isinstance(osm_tag_value, list) and osm_tag_value: + values_list = [f"'{self._sql_escape(value)}'" for value in osm_tag_value] + case_when_clauses.append( + f"WHEN \"{osm_tag_key}\" IN ({', '.join(values_list)}) THEN" + f" '{osm_tag_key}=' || \"{osm_tag_key}\"" + ) + case_clause = f'CASE {" ".join(case_when_clauses)} END AS "{group_name}"' + case_clauses.append(case_clause) + + joined_case_clauses = ", ".join(case_clauses) + grouped_features_relation = self.connection.sql(f""" + SELECT feature_id, {joined_case_clauses}, geometry + FROM ({features_relation.sql_query()}) + """) + else: + case_clauses = [] + group_names = sorted(grouped_tags_filter.keys()) + for group_name in group_names: + osm_filter = grouped_tags_filter[group_name] + case_when_clauses = [] + for osm_tag_key, osm_tag_value in osm_filter.items(): + element_clause = f"element_at(tags, '{osm_tag_key}')[1]" + if isinstance(osm_tag_value, bool) and osm_tag_value: + case_when_clauses.append( + f"WHEN {element_clause} IS NOT NULL THEN '{osm_tag_key}=' ||" + f" {element_clause}" + ) + elif isinstance(osm_tag_value, str): + escaped_value = self._sql_escape(osm_tag_value) + case_when_clauses.append( + f"WHEN {element_clause} = '{escaped_value}' THEN '{osm_tag_key}=' ||" + f" {element_clause}" + ) + elif isinstance(osm_tag_value, list) and osm_tag_value: + values_list = [f"'{self._sql_escape(value)}'" for value in osm_tag_value] + case_when_clauses.append( + f"WHEN {element_clause} IN ({', '.join(values_list)}) THEN" + f" '{osm_tag_key}=' || {element_clause}" + ) + case_clause = f'CASE {" ".join(case_when_clauses)} END' + case_clauses.append(case_clause) + + group_names_as_sql_strings = [f"'{group_name}'" for group_name in group_names] + groups_map = ( + f"map([{', '.join(group_names_as_sql_strings)}], [{', '.join(case_clauses)}])" + ) + non_null_groups_map = f"""map_from_entries( + [ + tag_entry + for tag_entry in map_entries({groups_map}) + if tag_entry.value IS NOT NULL + ] + ) as tags""" + + grouped_features_relation = self.connection.sql(f""" + SELECT feature_id, {non_null_groups_map}, geometry + FROM ({features_relation.sql_query()}) + """) + + return grouped_features_relation diff --git a/tests/test_files/529cdcbb7a3cc103658ef31b39bed24984e421127d319c867edf2f86ff3bb098.osm.pbf b/tests/test_files/529cdcbb7a3cc103658ef31b39bed24984e421127d319c867edf2f86ff3bb098.osm.pbf new file mode 100644 index 0000000..6af4104 Binary files /dev/null and b/tests/test_files/529cdcbb7a3cc103658ef31b39bed24984e421127d319c867edf2f86ff3bb098.osm.pbf differ diff --git a/tests/test_files/7a0163cb721992d6219d486b3d29517d06aa0db19dd7be049f4f1fabf6146073.osm.pbf b/tests/test_files/7a0163cb721992d6219d486b3d29517d06aa0db19dd7be049f4f1fabf6146073.osm.pbf new file mode 100644 index 0000000..18d9571 Binary files /dev/null and b/tests/test_files/7a0163cb721992d6219d486b3d29517d06aa0db19dd7be049f4f1fabf6146073.osm.pbf differ diff --git a/tests/test_files/aa756ad3a961ba6d9da46c712b0d979d0c7d4768641ceea7409b287e2d18a48f.osm.pbf b/tests/test_files/aa756ad3a961ba6d9da46c712b0d979d0c7d4768641ceea7409b287e2d18a48f.osm.pbf new file mode 100644 index 0000000..b5e85ae Binary files /dev/null and b/tests/test_files/aa756ad3a961ba6d9da46c712b0d979d0c7d4768641ceea7409b287e2d18a48f.osm.pbf differ diff --git a/tests/test_files/d17f922ed15e9609013a6b895e1e7af2d49158f03586f2c675d17b760af3452e.osm.pbf b/tests/test_files/d17f922ed15e9609013a6b895e1e7af2d49158f03586f2c675d17b760af3452e.osm.pbf new file mode 100644 index 0000000..d3a1916 Binary files /dev/null and b/tests/test_files/d17f922ed15e9609013a6b895e1e7af2d49158f03586f2c675d17b760af3452e.osm.pbf differ diff --git a/tests/test_files/eb2848d259345ce7dfe8af34fd1ab24503bb0b952e04e872c87c55550fa50fbf.osm.pbf b/tests/test_files/eb2848d259345ce7dfe8af34fd1ab24503bb0b952e04e872c87c55550fa50fbf.osm.pbf new file mode 100644 index 0000000..98c3ca6 Binary files /dev/null and b/tests/test_files/eb2848d259345ce7dfe8af34fd1ab24503bb0b952e04e872c87c55550fa50fbf.osm.pbf differ diff --git a/tests/test_files/monaco.osm.pbf b/tests/test_files/monaco.osm.pbf new file mode 100644 index 0000000..c6ab3c7 Binary files /dev/null and b/tests/test_files/monaco.osm.pbf differ diff --git a/tests/test_files/osmconf.ini b/tests/test_files/osmconf.ini new file mode 100644 index 0000000..227433b --- /dev/null +++ b/tests/test_files/osmconf.ini @@ -0,0 +1,132 @@ +# +# Configuration file for OSM import +# + +# put here the name of keys, or key=value, for ways that are assumed to be polygons if they are closed +# see http://wiki.openstreetmap.org/wiki/Map_Features +closed_ways_are_polygons=aeroway,amenity,boundary,building,building:part,craft,geological,historic,landuse,leisure,military,natural,office,place,shop,sport,tourism,highway=platform,public_transport=platform + +# Uncomment to avoid laundering of keys ( ':' turned into '_' ) +#attribute_name_laundering=no + +# Some tags, set on ways and when building multipolygons, multilinestrings or other_relations, +# are normally filtered out early, independent of the 'ignore' configuration below. +# Uncomment to disable early filtering. The 'ignore' lines below remain active. +#report_all_tags=yes + +# uncomment to report all nodes, including the ones without any (significant) tag +#report_all_nodes=yes + +# uncomment to report all ways, including the ones without any (significant) tag +#report_all_ways=yes + +# uncomment to specify the the format for the all_tags/other_tags field should be JSON +# instead of the default HSTORE formatting. +# Valid values for tags_format are "hstore" and "json" +tags_format=hstore + +[points] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,barrier,highway,ref,address,is_in,place,man_made +# keys that, alone, are not significant enough to report a node as a OGR point +unsignificant=created_by,converted_by,source,time,ele,attribution +# keys that should NOT be reported in the "other_tags" field +ignore=created_by,converted_by,source,time,ele,note,todo,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +[lines] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,highway,waterway,aerialway,barrier,man_made,railway + +# type of attribute 'foo' can be changed with something like +#foo_type=Integer/Real/String/DateTime + +# keys that should NOT be reported in the "other_tags" field +ignore=created_by,converted_by,source,time,ele,note,todo,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +#computed_attributes must appear before the keywords _type and _sql +computed_attributes=z_order +z_order_type=Integer +# Formula based on https://github.com/openstreetmap/osm2pgsql/blob/master/style.lua#L13 +# [foo] is substituted by value of tag foo. When substitution is not wished, the [ character can be escaped with \[ in literals +# Note for GDAL developers: if we change the below formula, make sure to edit ogrosmlayer.cpp since it has a hardcoded optimization for this very precise formula +z_order_sql="SELECT (CASE [highway] WHEN 'minor' THEN 3 WHEN 'road' THEN 3 WHEN 'unclassified' THEN 3 WHEN 'residential' THEN 3 WHEN 'tertiary_link' THEN 4 WHEN 'tertiary' THEN 4 WHEN 'secondary_link' THEN 6 WHEN 'secondary' THEN 6 WHEN 'primary_link' THEN 7 WHEN 'primary' THEN 7 WHEN 'trunk_link' THEN 8 WHEN 'trunk' THEN 8 WHEN 'motorway_link' THEN 9 WHEN 'motorway' THEN 9 ELSE 0 END) + (CASE WHEN [bridge] IN ('yes', 'true', '1') THEN 10 ELSE 0 END) + (CASE WHEN [tunnel] IN ('yes', 'true', '1') THEN -10 ELSE 0 END) + (CASE WHEN [railway] IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN [layer] IS NOT NULL THEN 10 * CAST([layer] AS INTEGER) ELSE 0 END)" + +[multipolygons] +# common attributes +# note: for multipolygons, osm_id=yes instantiates a osm_id field for the id of relations +# and a osm_way_id field for the id of closed ways. Both fields are exclusively set. +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type,aeroway,amenity,admin_level,barrier,boundary,building,craft,geological,historic,land_area,landuse,leisure,man_made,military,natural,office,place,shop,sport,tourism +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,todo,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +[multilinestrings] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,todo,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +[other_relations] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,todo,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes diff --git a/tests/test_pbf_file_reader.py b/tests/test_pbf_file_reader.py new file mode 100644 index 0000000..fbe8301 --- /dev/null +++ b/tests/test_pbf_file_reader.py @@ -0,0 +1,838 @@ +"""Tests for PbfFileReader.""" + +import platform +import re +import subprocess +import warnings +from collections.abc import Iterable +from pathlib import Path +from typing import Optional, Union, cast +from unittest import TestCase + +import duckdb +import geopandas as gpd +import pandas as pd +import pyogrio +import pytest +import six +from parametrization import Parametrization as P +from shapely import hausdorff_distance +from shapely.geometry import MultiPolygon, Polygon +from shapely.geometry.base import BaseGeometry +from shapely.ops import unary_union +from srai.geometry import remove_interiors +from srai.loaders.download import download_file +from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS, HEX2VEC_FILTER + +from quackosm._constants import FEATURES_INDEX +from quackosm._osm_tags_filters import OsmTagsFilter +from quackosm.pbf_file_reader import PbfFileReader + +ut = TestCase() +LFS_DIRECTORY_URL = "https://github.com/kraina-ai/srai-test-files/raw/main/files/" + + +@pytest.mark.parametrize( # type: ignore + "test_file_name,query,expected_result_length,expected_features_columns_length", + [ + ( + "d17f922ed15e9609013a6b895e1e7af2d49158f03586f2c675d17b760af3452e.osm.pbf", + None, + 678, + 271, + ), + ( + "eb2848d259345ce7dfe8af34fd1ab24503bb0b952e04e872c87c55550fa50fbf.osm.pbf", + None, + 1, + 22, + ), + ("529cdcbb7a3cc103658ef31b39bed24984e421127d319c867edf2f86ff3bb098.osm.pbf", None, 0, 0), + ( + "d17f922ed15e9609013a6b895e1e7af2d49158f03586f2c675d17b760af3452e.osm.pbf", + HEX2VEC_FILTER, + 97, + 10, + ), + ( + "eb2848d259345ce7dfe8af34fd1ab24503bb0b952e04e872c87c55550fa50fbf.osm.pbf", + HEX2VEC_FILTER, + 0, + 0, + ), + ( + "d17f922ed15e9609013a6b895e1e7af2d49158f03586f2c675d17b760af3452e.osm.pbf", + GEOFABRIK_LAYERS, + 433, + 22, + ), + ( + "eb2848d259345ce7dfe8af34fd1ab24503bb0b952e04e872c87c55550fa50fbf.osm.pbf", + GEOFABRIK_LAYERS, + 0, + 0, + ), + ], +) +def test_pbf_reader( + test_file_name: str, + query: OsmTagsFilter, + expected_result_length: int, + expected_features_columns_length: int, +): + """Test proper files loading in `PbfFileReader`.""" + handler = PbfFileReader(tags_filter=query) + features_gdf = handler.get_features_gdf( + file_paths=[Path(__file__).parent / "test_files" / test_file_name], + explode_tags=True, + ignore_cache=True, + ) + assert ( + len(features_gdf) == expected_result_length + ), f"Mismatched result length ({len(features_gdf)}, {expected_result_length})" + assert len(features_gdf.columns) == expected_features_columns_length + 1, ( + f"Mismatched columns length ({len(features_gdf.columns)}," + f" {expected_features_columns_length + 1})" + ) + + +def test_pbf_reader_geometry_filtering(): # type: ignore + """Test proper spatial data filtering in `PbfFileReader`.""" + file_name = "d17f922ed15e9609013a6b895e1e7af2d49158f03586f2c675d17b760af3452e.osm.pbf" + handler = PbfFileReader( + tags_filter=HEX2VEC_FILTER, geometry_filter=Polygon([(0, 0), (0, 1), (1, 1), (1, 0)]) + ) + features_gdf = handler.get_features_gdf( + file_paths=[Path(__file__).parent / "test_files" / file_name], + explode_tags=True, + ignore_cache=True, + ) + assert len(features_gdf) == 0 + + +# Copyright (C) 2011 by Hong Minhee , +# Robert Kajic +# Copyright (C) 2020 by Salesforce.com, Inc + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +def parse_hstore_tags(tags: str) -> dict[str, Optional[str]]: + """ + Parse hstore tags to python dict. + + This function has been copied from pghstore library + https://github.com/heroku/pghstore/blob/main/src/pghstore/_native.py + since it can't be installed on Windows. + """ + ESCAPE_RE = re.compile(r"\\(.)") + + PAIR_RE = re.compile( + r'\s*(?:"(?P(?:[^\\"]|\\.)*)")\s*=>\s*' + r'(?:"(?P(?:[^\\"]|\\.)*)"|(?PNULL))' + r"\s*(?:(?P,)|$)", + re.IGNORECASE, + ) + + def _unescape(s: str) -> str: + return ESCAPE_RE.sub(r"\1", s) + + def _parse(string: str, encoding: str = "utf-8") -> Iterable[tuple[str, Optional[str]]]: + if isinstance(string, six.binary_type): + string = string.decode(encoding) + + string = string.strip() + offset = 0 + term_sep = None + for match in PAIR_RE.finditer(string): + if match.start() > offset: + raise ValueError("malformed hstore value: position %d" % offset) + + key = value = None + kq = match.group("kq") + if kq: + key = _unescape(kq) + + if key is None: + raise ValueError("Malformed hstore value starting at position %d" % offset) + + vq = match.group("vq") + if vq: + value = _unescape(vq) + elif match.group("vn"): + value = "" + else: + value = "" + + yield key, value + + term_sep = match.group("ts") + + offset = match.end() + + if len(string) > offset or term_sep: + raise ValueError("malformed hstore value: position %d" % offset) + + return dict(_parse(tags, encoding="utf-8")) + + +def transform_pbf_to_gpkg(extract_name: str, layer_name: str) -> Path: + """Uses GDAL ogr2ogr to transform PBF file into GPKG.""" + input_file = Path(__file__).parent / "files" / f"{extract_name}.osm.pbf" + output_file = Path(__file__).parent / "files" / f"{extract_name}_{layer_name}.gpkg" + config_file = Path(__file__).parent / "test_files" / "osmconf.ini" + args = [ + "ogr2ogr" if platform.system() != "Windows" else "ogr2ogr.exe", + str(output_file), + str(input_file), + layer_name, + "-oo", + f"CONFIG_FILE={config_file}", + ] + p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1) + _, err = p.communicate() + rc = p.returncode + if rc > 0: + raise RuntimeError(rc, err) + + return output_file + + +def read_features_with_pyogrio(extract_name: str) -> gpd.GeoDataFrame: + """Read features from *.osm.pbf file using pyogrio.""" + gdfs = [] + for layer_name in ("points", "lines", "multilinestrings", "multipolygons", "other_relations"): + gpkg_file_path = transform_pbf_to_gpkg(extract_name, layer_name) + gdf = pyogrio.read_dataframe(gpkg_file_path) + + if layer_name == "points": + gdf[FEATURES_INDEX] = "node/" + gdf["osm_id"] + elif layer_name == "lines": + gdf[FEATURES_INDEX] = "way/" + gdf["osm_id"] + elif layer_name in ("multilinestrings", "other_relations"): + gdf[FEATURES_INDEX] = "relation/" + gdf["osm_id"] + elif layer_name == "multipolygons": + gdf[FEATURES_INDEX] = gdf.apply( + lambda row: ( + "relation/" + row["osm_id"] + if row["osm_id"] is not None + else "way/" + row["osm_way_id"] + ), + axis=1, + ) + + gdfs.append(gdf) + + final_gdf = gpd.pd.concat(gdfs) + final_gdf = final_gdf[~final_gdf["all_tags"].isnull()] + final_gdf["tags"] = final_gdf["all_tags"].apply(parse_hstore_tags) + non_relations = ~final_gdf[FEATURES_INDEX].str.startswith("relation/") + relations = final_gdf[FEATURES_INDEX].str.startswith("relation/") + matching_relations = relations & final_gdf["tags"].apply( + lambda x: x.get("type") in ("boundary", "multipolygon") + ) + final_gdf = final_gdf[non_relations | matching_relations] + final_gdf.geometry = final_gdf.geometry.make_valid() + return final_gdf[[FEATURES_INDEX, "tags", "geometry"]].set_index(FEATURES_INDEX) + + +def check_if_relation_in_osm_is_valid_based_on_tags(pbf_file: str, relation_id: str) -> bool: + """Check if given relation in OSM is valid.""" + duckdb.load_extension("spatial") + return cast( + bool, + duckdb.sql( + f"SELECT list_contains(ref_roles, 'outer') FROM ST_READOSM('{pbf_file}') " + "WHERE kind = 'relation' AND len(refs) > 0 AND list_contains(map_keys(tags), 'type') " + "AND list_has_any(map_extract(tags, 'type'), ['boundary', 'multipolygon']) " + f"AND id = {relation_id}" + ).fetchone()[0], + ) + + +def check_if_relation_in_osm_is_valid_based_on_geometry(pbf_file: str, relation_id: str) -> bool: + """ + Check if given relation in OSM is valid. + + Reconstructs full geometry for a single ID and check if there is at least one outer geometry. + Sometimes + """ + duckdb.load_extension("spatial") + return cast( + bool, + duckdb.sql(f""" + WITH required_relation AS ( + SELECT + r.id + FROM ST_ReadOsm('{pbf_file}') r + WHERE r.kind = 'relation' + AND len(r.refs) > 0 + AND list_contains(map_keys(r.tags), 'type') + AND list_has_any( + map_extract(r.tags, 'type'), + ['boundary', 'multipolygon'] + ) + AND r.id = {relation_id} + ), + unnested_relation_refs AS ( + SELECT + r.id, + UNNEST(refs) as ref, + UNNEST(ref_types) as ref_type, + UNNEST(ref_roles) as ref_role, + UNNEST(range(length(refs))) as ref_idx + FROM ST_ReadOsm('{pbf_file}') r + SEMI JOIN required_relation rr + ON r.id = rr.id + ), + unnested_relation_way_refs AS ( + SELECT id, ref, COALESCE(ref_role, 'outer') as ref_role, ref_idx + FROM unnested_relation_refs + WHERE ref_type = 'way' + ), + unnested_relations AS ( + SELECT + r.id, + COALESCE(r.ref_role, 'outer') as ref_role, + r.ref, + FROM unnested_relation_way_refs r + ORDER BY r.id, r.ref_idx + ), + unnested_way_refs AS ( + SELECT + w.id, + UNNEST(refs) as ref, + UNNEST(ref_types) as ref_type, + UNNEST(range(length(refs))) as ref_idx + FROM ST_ReadOsm('{pbf_file}') w + SEMI JOIN unnested_relation_way_refs urwr + ON urwr.ref = w.id + WHERE w.kind = 'way' + ), + nodes_geometries AS ( + SELECT + n.id, + ST_POINT(n.lon, n.lat) geom + FROM ST_ReadOsm('{pbf_file}') n + SEMI JOIN unnested_way_refs uwr + ON uwr.ref = n.id + WHERE n.kind = 'node' + ), + way_geometries AS ( + SELECT uwr.id, ST_MakeLine(LIST(n.geom ORDER BY ref_idx ASC)) linestring + FROM unnested_way_refs uwr + JOIN nodes_geometries n + ON uwr.ref = n.id + GROUP BY uwr.id + ), + any_outer_refs AS ( + SELECT id, bool_or(ref_role == 'outer') any_outer_refs + FROM unnested_relations + GROUP BY id + ), + relations_with_geometries AS ( + SELECT + x.id, + CASE WHEN aor.any_outer_refs + THEN x.ref_role ELSE 'outer' + END as ref_role, + x.geom geometry, + row_number() OVER (PARTITION BY x.id) as geometry_id + FROM ( + SELECT + unnested_relations.id, + unnested_relations.ref_role, + UNNEST( + ST_Dump(ST_LineMerge(ST_Collect(list(way_geometries.linestring)))), + recursive := true + ), + FROM unnested_relations + JOIN way_geometries ON way_geometries.id = unnested_relations.ref + GROUP BY unnested_relations.id, unnested_relations.ref_role + ) x + JOIN any_outer_refs aor ON aor.id = x.id + WHERE ST_NPoints(geom) >= 4 + ), + valid_relations AS ( + SELECT id, is_valid + FROM ( + SELECT + id, + bool_and( + ST_Equals(ST_StartPoint(geometry), ST_EndPoint(geometry)) + ) is_valid + FROM relations_with_geometries + WHERE ref_role = 'outer' + GROUP BY id + ) + WHERE is_valid = true + ) + SELECT COUNT(*) > 0 AS 'any_valid_outer_geometry' + FROM valid_relations + """).fetchone()[0], + ) + + +def get_tags_from_osm_element(pbf_file: str, feature_id: str) -> dict[str, str]: + """Check if given relation in OSM is valid.""" + duckdb.load_extension("spatial") + kind, osm_id = feature_id.split("/", 2) + raw_tags = duckdb.sql( + f"SELECT tags FROM ST_READOSM('{pbf_file}') WHERE kind = '{kind}' AND id = {osm_id}" + ).fetchone()[0] + return dict(zip(raw_tags["key"], raw_tags["value"])) + + +def extract_polygons_from_geometry(geometry: BaseGeometry) -> list[Union[Polygon, MultiPolygon]]: + """Extract only Polygons and MultiPolygons from the geometry.""" + polygon_geometries = [] + if geometry.geom_type in ("Polygon", "MultiPolygon"): + polygon_geometries.append(geometry) + elif geometry.geom_type in ("GeometryCollection"): + polygon_geometries.extend( + sub_geom + for sub_geom in geometry.geoms + if sub_geom.geom_type in ("Polygon", "MultiPolygon") + ) + return polygon_geometries + + +@P.parameters("extract_name") # type: ignore +@P.case("Bahamas", "bahamas") # type: ignore +@P.case("Cambodia", "cambodia") # type: ignore +@P.case("Cyprus", "cyprus") # type: ignore +@P.case("El Salvador", "el-salvador") # type: ignore +@P.case("Fiji", "fiji") # type: ignore +@P.case("Greenland", "greenland") # type: ignore +@P.case("Kiribati", "kiribati") # type: ignore +@P.case("Maldives", "maldives") # type: ignore +@P.case("Mauritius", "mauritius") # type: ignore +@P.case("Monaco", "monaco") # type: ignore +@P.case("Panama", "panama") # type: ignore +@P.case("Seychelles", "seychelles") # type: ignore +@P.case("Sierra Leone", "sierra-leone") # type: ignore +def test_gdal_parity(extract_name: str) -> None: + """Test if loaded data is similar to GDAL results.""" + pbf_file_download_url = LFS_DIRECTORY_URL + f"{extract_name}-latest.osm.pbf" + pbf_file_path = Path(__file__).parent / "files" / f"{extract_name}.osm.pbf" + download_file(pbf_file_download_url, str(pbf_file_path), force_download=True) + + reader = PbfFileReader() + duckdb_gdf = reader.get_features_gdf([pbf_file_path], explode_tags=False, ignore_cache=True) + gdal_gdf = read_features_with_pyogrio(extract_name) + + gdal_index = gdal_gdf.index + duckdb_index = duckdb_gdf.index + + missing_in_duckdb = gdal_index.difference(duckdb_index) + # Get missing non relation features with at least one non-area tag value + non_relations_missing_in_duckdb = [ + feature_id + for feature_id in missing_in_duckdb + if not feature_id.startswith("relation/") + and any(True for k in gdal_gdf.loc[feature_id].tags.keys() if k != "area") + ] + valid_relations_missing_in_duckdb = [ + feature_id + for feature_id in missing_in_duckdb + if feature_id.startswith("relation/") + and check_if_relation_in_osm_is_valid_based_on_tags( + str(pbf_file_path), feature_id.replace("relation/", "") + ) + and check_if_relation_in_osm_is_valid_based_on_geometry( + str(pbf_file_path), feature_id.replace("relation/", "") + ) + ] + + invalid_relations_missing_in_duckdb = missing_in_duckdb.difference( + non_relations_missing_in_duckdb + ).difference(valid_relations_missing_in_duckdb) + + assert ( + not non_relations_missing_in_duckdb + ), f"Missing non relation features in PbfFileReader ({non_relations_missing_in_duckdb})" + + assert ( + not valid_relations_missing_in_duckdb + ), f"Missing valid relation features in PbfFileReader ({valid_relations_missing_in_duckdb})" + + if len(invalid_relations_missing_in_duckdb) > 0: + warnings.warn( + "Invalid relations exists in OSM GDAL data extract" + f" ({invalid_relations_missing_in_duckdb})", + stacklevel=1, + ) + + invalid_features = [] + + common_index = gdal_index.difference(invalid_relations_missing_in_duckdb) + joined_df = pd.DataFrame( + dict( + duckdb_tags=duckdb_gdf.loc[common_index].tags, + source_tags=gdal_gdf.loc[common_index].tags, + duckdb_geometry=duckdb_gdf.loc[common_index].geometry, + gdal_geometry=gdal_gdf.loc[common_index].geometry, + ), + index=common_index, + ) + + # Check tags + joined_df["tags_keys_difference"] = joined_df.apply( + lambda x: set(x.duckdb_tags.keys()) + .symmetric_difference(x.source_tags.keys()) + .difference(["area"]), + axis=1, + ) + + # If difference - compare tags with source data. + # Sometimes GDAL copies tags from members to a parent. + mismatched_rows = joined_df["tags_keys_difference"].str.len() != 0 + if mismatched_rows.any(): + joined_df.loc[mismatched_rows, "source_tags"] = [ + get_tags_from_osm_element(str(pbf_file_path), row_index) + for row_index in joined_df.loc[mismatched_rows].index + ] + + joined_df.loc[mismatched_rows, "tags_keys_difference"] = joined_df.loc[ + mismatched_rows + ].apply( + lambda x: set(x.duckdb_tags.keys()) + .symmetric_difference(x.source_tags.keys()) + .difference(["area"]), + axis=1, + ) + + for row_index in common_index: + tags_keys_difference = joined_df.loc[row_index, "tags_keys_difference"] + duckdb_tags = joined_df.loc[row_index, "duckdb_tags"] + source_tags = joined_df.loc[row_index, "source_tags"] + assert not tags_keys_difference, ( + f"Tags keys aren't equal. ({row_index}, {tags_keys_difference}," + f" {duckdb_tags.keys()}, {source_tags.keys()})" + ) + ut.assertDictEqual( + {k: v for k, v in duckdb_tags.items() if k != "area"}, + {k: v for k, v in source_tags.items() if k != "area"}, + f"Tags aren't equal. ({row_index})", + ) + + invalid_geometries_df = joined_df + + # Check if both geometries are closed or open + invalid_geometries_df["duckdb_is_closed"] = invalid_geometries_df["duckdb_geometry"].apply( + lambda x: x.is_closed + ) + invalid_geometries_df["gdal_is_closed"] = invalid_geometries_df["gdal_geometry"].apply( + lambda x: x.is_closed + ) + invalid_geometries_df["geometry_both_closed_or_not"] = ( + invalid_geometries_df["duckdb_is_closed"] == invalid_geometries_df["gdal_is_closed"] + ) + + tolerance = 0.5 * 10 ** (-6) + # Check if geometries are almost equal - same geom type, same points + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_both_closed_or_not"], "geometry_almost_equals" + ] = gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_both_closed_or_not"], "duckdb_geometry" + ] + ).geom_equals_exact( + gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_both_closed_or_not"], "gdal_geometry" + ] + ), + tolerance=tolerance, + ) + invalid_geometries_df = invalid_geometries_df.loc[ + ~( + invalid_geometries_df["geometry_both_closed_or_not"] + & invalid_geometries_df["geometry_almost_equals"] + ) + ] + if invalid_geometries_df.empty: + return + + # Check geometries equality - same geom type, same points + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_both_closed_or_not"], "geometry_equals" + ] = gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_both_closed_or_not"], "duckdb_geometry" + ] + ).geom_equals( + gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_both_closed_or_not"], "gdal_geometry" + ] + ) + ) + invalid_geometries_df = invalid_geometries_df.loc[ + ~( + invalid_geometries_df["geometry_both_closed_or_not"] + & invalid_geometries_df["geometry_equals"] + ) + ] + if invalid_geometries_df.empty: + return + + # Check geometries overlap if polygons - slight misalingment between points, + # but marginal + matching_polygon_geometries_mask = ( + invalid_geometries_df["geometry_both_closed_or_not"] + & gpd.GeoSeries(invalid_geometries_df["duckdb_geometry"]).geom_type.isin( + ("Polygon", "MultiPolygon", "GeometryCollection") + ) + & gpd.GeoSeries(invalid_geometries_df["gdal_geometry"]).geom_type.isin( + ("Polygon", "MultiPolygon", "GeometryCollection") + ) + ) + invalid_geometries_df.loc[matching_polygon_geometries_mask, "geometry_intersection_area"] = ( + gpd.GeoSeries( + invalid_geometries_df.loc[matching_polygon_geometries_mask, "duckdb_geometry"] + ) + .intersection( + gpd.GeoSeries( + invalid_geometries_df.loc[matching_polygon_geometries_mask, "gdal_geometry"] + ) + ) + .area + ) + + invalid_geometries_df.loc[ + matching_polygon_geometries_mask, "iou_metric" + ] = invalid_geometries_df.loc[ + matching_polygon_geometries_mask, "geometry_intersection_area" + ] / ( + gpd.GeoSeries( + invalid_geometries_df.loc[matching_polygon_geometries_mask, "duckdb_geometry"] + ).area + + gpd.GeoSeries( + invalid_geometries_df.loc[matching_polygon_geometries_mask, "gdal_geometry"] + ).area + - invalid_geometries_df.loc[matching_polygon_geometries_mask, "geometry_intersection_area"] + ) + + invalid_geometries_df.loc[matching_polygon_geometries_mask, "geometry_iou_near_one"] = ( + invalid_geometries_df.loc[matching_polygon_geometries_mask, "iou_metric"] >= (1 - tolerance) + ) + invalid_geometries_df = invalid_geometries_df.loc[ + ~(matching_polygon_geometries_mask & invalid_geometries_df["geometry_iou_near_one"]) + ] + if invalid_geometries_df.empty: + return + + # Check if points lay near each other - regardless of geometry type + # (Polygon vs LineString) + invalid_geometries_df["hausdorff_distance_value"] = invalid_geometries_df.apply( + lambda x: hausdorff_distance(x.duckdb_geometry, x.gdal_geometry, densify=0.5), axis=1 + ) + invalid_geometries_df["geometry_close_hausdorff_distance"] = ( + invalid_geometries_df["hausdorff_distance_value"] < 1e-10 + ) + + # Check if GDAL geometry is a linestring while DuckDB geometry is a polygon + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_close_hausdorff_distance"], + "is_duckdb_polygon_and_gdal_linestring", + ] = invalid_geometries_df.loc[invalid_geometries_df["geometry_close_hausdorff_distance"]].apply( + lambda x: x.duckdb_geometry.geom_type + in ( + "Polygon", + "MultiPolygon", + ) + and x.gdal_geometry.geom_type in ("LineString", "MultiLineString"), + axis=1, + ) + + # Check if DuckDB geometry can be a polygon and not a linestring + # based on features config + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_close_hausdorff_distance"] + & invalid_geometries_df["is_duckdb_polygon_and_gdal_linestring"], + "is_proper_filter_tag_value", + ] = invalid_geometries_df.loc[ + invalid_geometries_df["geometry_close_hausdorff_distance"] + & invalid_geometries_df["is_duckdb_polygon_and_gdal_linestring"], + "duckdb_tags", + ].apply( + lambda x: any( + (tag in reader.osm_way_polygon_features_config.all) + or ( + tag in reader.osm_way_polygon_features_config.allowlist + and value in reader.osm_way_polygon_features_config.allowlist[tag] + ) + or ( + tag in reader.osm_way_polygon_features_config.denylist + and value not in reader.osm_way_polygon_features_config.denylist[tag] + ) + for tag, value in x.items() + ) + ) + + invalid_geometries_df = invalid_geometries_df.loc[ + ~( + invalid_geometries_df["geometry_close_hausdorff_distance"] + & invalid_geometries_df["is_duckdb_polygon_and_gdal_linestring"] + & invalid_geometries_df["is_proper_filter_tag_value"] + ) + ] + if invalid_geometries_df.empty: + return + + # Check if GDAL geometry is a polygon while DuckDB geometry is a linestring + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_close_hausdorff_distance"], + "is_duckdb_linestring_and_gdal_polygon", + ] = invalid_geometries_df.loc[invalid_geometries_df["geometry_close_hausdorff_distance"]].apply( + lambda x: x.duckdb_geometry.geom_type + in ( + "LineString", + "MultiLineString", + ) + and x.gdal_geometry.geom_type in ("Polygon", "MultiPolygon"), + axis=1, + ) + + # Check if DuckDB geometry should be a linestring and not a polygon + # based on features config + invalid_geometries_df.loc[ + invalid_geometries_df["geometry_close_hausdorff_distance"] + & invalid_geometries_df["is_duckdb_linestring_and_gdal_polygon"], + "is_not_in_filter_tag_value", + ] = invalid_geometries_df.loc[ + invalid_geometries_df["geometry_close_hausdorff_distance"] + & invalid_geometries_df["is_duckdb_linestring_and_gdal_polygon"], + "duckdb_tags", + ].apply( + lambda x: any( + (tag not in reader.osm_way_polygon_features_config.all) + and ( + tag not in reader.osm_way_polygon_features_config.allowlist + or ( + tag in reader.osm_way_polygon_features_config.allowlist + and value not in reader.osm_way_polygon_features_config.allowlist[tag] + ) + ) + and ( + tag not in reader.osm_way_polygon_features_config.denylist + or ( + tag in reader.osm_way_polygon_features_config.denylist + and value in reader.osm_way_polygon_features_config.denylist[tag] + ) + ) + for tag, value in x.items() + ) + ) + + invalid_geometries_df = invalid_geometries_df.loc[ + ~( + invalid_geometries_df["geometry_close_hausdorff_distance"] + & invalid_geometries_df["is_duckdb_linestring_and_gdal_polygon"] + & invalid_geometries_df["is_not_in_filter_tag_value"] + ) + ] + if invalid_geometries_df.empty: + return + + # Sometimes GDAL parses geometries incorrectly because of errors in OSM data + # Examples of errors: + # - overlapping inner ring with outer ring + # - intersecting outer rings + # - intersecting inner rings + # - inner ring outside outer geometry + # If we detect thattaht the difference between those geometries + # lie inside the exterior of the geometry, we can assume that the OSM geometry + # is improperly defined. + invalid_geometries_df["duckdb_unioned_geometry_without_holes"] = invalid_geometries_df[ + "duckdb_geometry" + ].apply( + lambda x: ( + remove_interiors(unary_union(polygons)) + if len(polygons := extract_polygons_from_geometry(x)) > 0 + else None + ) + ) + invalid_geometries_df["gdal_unioned_geometry_without_holes"] = invalid_geometries_df[ + "gdal_geometry" + ].apply( + lambda x: ( + remove_interiors(unary_union(polygons)) + if len(polygons := extract_polygons_from_geometry(x)) > 0 + else None + ) + ) + invalid_geometries_df["both_polygon_geometries"] = ( + ~pd.isna(invalid_geometries_df["duckdb_unioned_geometry_without_holes"]) + ) & (~pd.isna(invalid_geometries_df["gdal_unioned_geometry_without_holes"])) + + # Check if the differences doesn't extend both geometries, + # only one sided difference can be accepted + invalid_geometries_df.loc[ + invalid_geometries_df["both_polygon_geometries"], "duckdb_geometry_fully_covered_by_gdal" + ] = gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["both_polygon_geometries"], + "duckdb_unioned_geometry_without_holes", + ] + ).covered_by( + gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["both_polygon_geometries"], + "gdal_unioned_geometry_without_holes", + ] + ) + ) + + invalid_geometries_df.loc[ + invalid_geometries_df["both_polygon_geometries"], "gdal_geometry_fully_covered_by_duckdb" + ] = gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["both_polygon_geometries"], "gdal_unioned_geometry_without_holes" + ] + ).covered_by( + gpd.GeoSeries( + invalid_geometries_df.loc[ + invalid_geometries_df["both_polygon_geometries"], + "duckdb_unioned_geometry_without_holes", + ] + ) + ) + + invalid_geometries_df = invalid_geometries_df.loc[ + ~( + invalid_geometries_df["duckdb_geometry_fully_covered_by_gdal"] + | invalid_geometries_df["gdal_geometry_fully_covered_by_duckdb"] + ) + ] + if invalid_geometries_df.empty: + return + + invalid_features = ( + invalid_geometries_df.drop( + columns=["duckdb_tags", "source_tags", "duckdb_geometry", "gdal_geometry"] + ) + .reset_index() + .to_dict(orient="records") + ) + + assert not invalid_features, ( + f"Geometries aren't equal - ({[t[FEATURES_INDEX] for t in invalid_features]}). Full debug" + f" output: ({invalid_features})" + ) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..cd42262 --- /dev/null +++ b/tox.ini @@ -0,0 +1,17 @@ +[tox] +envlist = + python3.9 + python3.10 + python3.11 +isolated_build = True +skip_missing_interpreters = True + +[testenv] +groups = + test +deps = coverage +commands = + coverage run --source=quackosm -m pytest -v --durations=20 tests + coverage xml -o coverage.{envname}.xml + coverage report -m +skip_install = true