Skip to content

Commit

Permalink
Merge pull request #372 from HLasse/added-pyupgrade
Browse files Browse the repository at this point in the history
ci: Added pyupgrade to ruff
  • Loading branch information
HLasse authored Dec 16, 2024
2 parents bb07e1b + 24938bb commit a44ec46
Show file tree
Hide file tree
Showing 30 changed files with 166 additions and 274 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# GitHub action to run linting

name: run-linting

on:
push:
branches: [main]
pull_request:

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true

- name: Set up Python
run: uv python install 3.9

- name: Install dependencies
run: make install

- name: Lint
id: lint
run: |
make lint-check
45 changes: 0 additions & 45 deletions .pre-commit-config.yaml

This file was deleted.

1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
from __future__ import annotations

from textdescriptives.about import __version__

Expand Down
4 changes: 3 additions & 1 deletion docs/tutorials/filter_corpus_using_quality.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@
"from datasets import load_dataset\n",
"\n",
"# stream in the dataset\n",
"dataset = load_dataset(\"mc4\", \"en\", streaming=True, split=\"train\", trust_remote_code=True)\n",
"dataset = load_dataset(\n",
" \"mc4\", \"en\", streaming=True, split=\"train\", trust_remote_code=True\n",
")\n",
"\n",
"# download the first 1 000\n",
"dataset = dataset.take(1000)\n",
Expand Down
13 changes: 7 additions & 6 deletions docs/tutorials/sklearn_integration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
],
"source": [
"from textdescriptives.utils import load_sms_data\n",
"\n",
"df = load_sms_data()\n",
"df.head()"
]
Expand All @@ -152,7 +153,7 @@
"# to textdescriptives.extract_metrics\n",
"descriptive_stats_extractor = TextDescriptivesFeaturizer(\n",
" lang=\"en\", metrics=[\"descriptive_stats\"]\n",
" )"
")"
]
},
{
Expand Down Expand Up @@ -184,7 +185,7 @@
"from sklearn.pipeline import Pipeline\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn import set_config\n",
"\n",
Expand All @@ -197,10 +198,9 @@
" (\n",
" \"featurizer\",\n",
" ColumnTransformer(\n",
" [(\"text_processing\", descriptive_stats_extractor, \"message\")]\n",
" ,\n",
" # removes the `text_processing__` prefix from feature names\n",
" verbose_feature_names_out=False, \n",
" [(\"text_processing\", descriptive_stats_extractor, \"message\")],\n",
" # removes the `text_processing__` prefix from feature names\n",
" verbose_feature_names_out=False,\n",
" ),\n",
" ),\n",
" (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
Expand Down Expand Up @@ -366,6 +366,7 @@
],
"source": [
"import pandas as pd\n",
"\n",
"# extract feature importances\n",
"feature_importance_mapping = list(\n",
" zip(\n",
Expand Down
14 changes: 8 additions & 6 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
install:
@echo "--- 🚀 Installing project ---"
uv sync --extra docs --extra tests --extra style --extra style
uv sync --extra docs --extra tests --extra style
uv pip install pip
uv pip install -r tests/requirements.txt

lint:
@echo "--- 🧹 Running linters ---"
ruff format . # running ruff formatting
ruff check **/*.py --fix # running ruff linting
uv run ruff format . # running ruff formatting
uv run ruff check **/*.py --fix # running ruff linting

lint-check:
@echo "--- 🧹 Check is project is linted ---"
ruff format . --check # running ruff formatting
ruff check **/*.py # running ruff linting
uv run ruff format . --check # running ruff formatting
uv run ruff check **/*.py # running ruff linting

test:
@echo "--- 🧪 Running tests ---"
make install
pytest tests/
pytest tests/ -n auto

build-docs:
@echo "--- 📚 Building docs ---"
Expand Down
17 changes: 15 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ classifiers = [
"Operating System :: Microsoft :: Windows",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]

keywords = [
Expand Down Expand Up @@ -49,8 +53,8 @@ repository = "https://github.com/HLasse/textdescriptives"
documentation = "https://hlasse.github.io/TextDescriptives/"

[project.optional-dependencies]
style = ["black==24.1.1", "pre-commit==3.6.0", "ruff==0.1.15", "mypy==1.8.0"]
tests = ["pytest>=7.1.3", "pytest-cov>=3.0.0"]
style = ["ruff==0.8.3"]
tests = ["pytest>=7.1.3", "pytest-cov>=3.0.0", "pytest-xdist"]
docs = [
"pydantic==2.1",
"sphinx>=5.3.0",
Expand Down Expand Up @@ -115,3 +119,12 @@ build_command = "python -m pip install build; python -m build"

[tool.ruff]
exclude = [".venv", ".env", ".git", "__pycache__"]

[tool.ruff.lint]
select = ["UP", "I"]

[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]

[tool.ruff.lint.pydocstyle]
convention = "google"
2 changes: 1 addition & 1 deletion src/textdescriptives/about.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" About textdescriptives, version number is specified in the setup.cfg
"""About textdescriptives, version number is specified in the setup.cfg
file."""

# if python >= 3.8, use importlib.metadata otherwise use pkg_resources
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/dependency_distance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of statistics related to dependency distance."""
"""Calculation of statistics related to dependency distance."""

from typing import Callable

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/descriptive_stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of descriptive statistics."""
"""Calculation of descriptive statistics."""

from typing import Callable, Dict, Union

Expand Down
7 changes: 5 additions & 2 deletions src/textdescriptives/components/information_theory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculate the entropy and perplexity of a corpus."""
"""Calculate the entropy and perplexity of a corpus."""

from typing import Callable, Dict, Union

Expand Down Expand Up @@ -64,7 +64,10 @@ def per_word_perplexity_getter(doc: Union[Doc, Span]) -> float:
else:
perplexity = perplexity_getter(doc)

return perplexity / len(doc)
len_doc = len(doc)
if len_doc:
return perplexity / len(doc)
return np.nan


def set_docspan_extension(
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/pos_proportions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of statistics that require a pos-tagger in the pipeline."""
"""Calculation of statistics that require a pos-tagger in the pipeline."""

from typing import Callable, Counter, List, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/quality.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Component for calculating quality metrics."""
"""Component for calculating quality metrics."""

from collections import Counter, defaultdict
from typing import Callable, Dict, List, Mapping, Optional, Tuple, Union
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/quality_data_classes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Data classes used for the quality component."""
"""Data classes used for the quality component."""

from typing import Any, Dict, Optional, Tuple, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/readability.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of various readability metrics."""
"""Calculation of various readability metrics."""

from typing import Callable, Dict

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Utility functions for calculating various text descriptives."""
"""Utility functions for calculating various text descriptives."""

from typing import Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/extractors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Extract metrics as Pandas DataFrame."""
"""Extract metrics as Pandas DataFrame."""

from typing import Any, Dict, Iterable, List, Optional, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/load_components.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Adds all components to a spaCy pipeline."""
"""Adds all components to a spaCy pipeline."""

from spacy.language import Language
from spacy.tokens import Doc
Expand Down
4 changes: 3 additions & 1 deletion tests/books.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
""" These books and several tests are borrowed from
"""These books and several tests are borrowed from
https://github.com/mholtzscher/spacy_readability."""

from __future__ import annotations

oliver_twist = """Among other public buildings in a certain town, which for many reasons
it will be prudent to refrain from mentioning, and to which I will
assign no fictitious name, there is one anciently common to most towns,
Expand Down
11 changes: 7 additions & 4 deletions tests/test_coherence.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from __future__ import annotations

import warnings

import numpy as np
import pytest
import spacy

import textdescriptives as td # noqa: F401
import warnings


@pytest.fixture(scope="function")
def nlp():
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("textdescriptives/coherence")
return nlp
nlp_en = spacy.load("en_core_web_sm")
nlp_en.add_pipe("textdescriptives/coherence")
return nlp_en


def test_coherence_integration(nlp):
Expand Down
4 changes: 3 additions & 1 deletion tests/test_dependency_distance.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import ftfy
import numpy as np
import pytest
import spacy
import textdescriptives as td # noqa: F401

import textdescriptives as td # noqa: F401

from .books import flatland, oliver_twist, secret_garden

Expand Down
6 changes: 4 additions & 2 deletions tests/test_descriptive_stats.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from __future__ import annotations

import warnings

import ftfy
import pytest
from spacy.lang.en import English
Expand All @@ -6,8 +10,6 @@

from .books import flatland, oliver_twist, secret_garden

import warnings


@pytest.fixture(scope="function")
def nlp():
Expand Down
2 changes: 2 additions & 0 deletions tests/test_extractors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pytest
import spacy

Expand Down
2 changes: 2 additions & 0 deletions tests/test_information.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import numpy as np
import pytest
import spacy
Expand Down
3 changes: 3 additions & 0 deletions tests/test_load_components.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

import pytest
import spacy

import textdescriptives as td # noqa: F401


Expand Down
2 changes: 2 additions & 0 deletions tests/test_pos_proportions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pytest
import spacy
from spacy.tokens import Doc
Expand Down
Loading

0 comments on commit a44ec46

Please sign in to comment.