Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise API (BREAKING CHANGE) #99

Merged
merged 3 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Historic and pre-release versions aren't necessarily included.

### Changed

- *BREAKING CHANGES:*
- `get_profile_info()` returns`ProfileInfo` `NamedTuple` instead of dict
- `get_manager_member_counts()` returns `ManagerMemberCounts` `NamedTuple`
instead of dict
- `club_manager_url_via_login()` renamed to `manager_url_via_login()`
- `club_profile_url()` renamed to `profile_url()`
- Only get logger once, rather than for every log message
- Docs: docstring simplification, improvements
- Dev dependencies: add pdoc
Expand Down
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ See also https://playwright.dev/python/docs/browsers#install-system-dependencies
### Get info from a club's profile

```python
britishcycling_clubs.get_profile_info(club_id="123")
from britishcycling_clubs import get_profile_info
get_profile_info(club_id="123")
```
Returns a dict with these keys and values:

Expand All @@ -56,13 +57,17 @@ It then retrieves and prints the club name and total member count.


### Construct club's profile URL

```python
britishcycling_clubs.club_profile_url(club_id="123")
from britishcycling_clubs import profile_url
profile_url(club_id="123")
```

### Get member counts from Club Manager

```python
britishcycling_clubs.get_manager_member_counts(
from britishcycling_clubs import get_manager_member_counts
get_manager_member_counts(
club_id="123",
username="USERNAME",
password="PASSWORD",
Expand All @@ -84,8 +89,10 @@ It then retrieves and prints the number of active, expired and new
club member counts from the club's Club Manager pages.

### Construct club's Club Manager URL (via login)

```python
britishcycling_clubs.club_manager_url_via_login(club_id=123)
from britishcycling_clubs import manager_url_via_login
manager_url_via_login(club_id="123")
```
Returns URL which redirects to Club Manager URL, via login if needed.

Expand Down
14 changes: 9 additions & 5 deletions britishcycling_clubs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
"""Module with functions to retrieve information about a club."""

from britishcycling_clubs.manager import (
club_manager_url_via_login,
ManagerMemberCounts,
get_manager_member_counts,
manager_url_via_login,
)
from britishcycling_clubs.profile import (
club_profile_url,
ProfileInfo,
get_profile_info,
profile_url,
)

__all__ = [
"club_manager_url_via_login",
"get_manager_member_counts",
"club_profile_url",
"profile_url",
"get_profile_info",
"ProfileInfo",
"manager_url_via_login",
"get_manager_member_counts",
"ManagerMemberCounts",
]
46 changes: 20 additions & 26 deletions britishcycling_clubs/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,30 @@
import logging
import time
from pprint import pformat
from typing import TYPE_CHECKING, TypedDict
from typing import NamedTuple

from playwright.sync_api import sync_playwright

if TYPE_CHECKING:
from typing_extensions import TypeGuard

_MANAGER_VIA_LOGIN_BASE_URL = "https://www.britishcycling.org.uk/uac/connect?success_url=/dashboard/club/membership?club_id="


class MemberCounts(TypedDict):
"""Return type for `get_manager_member_counts()` function."""
class ManagerMemberCounts(NamedTuple):
"""Returned by `get_manager_member_counts()` function."""

active: int
"""Value from 'Active Club Members' tab."""
new: int
"""Value from 'New Club Subscriptions' tab."""
expired: int
"""Value from 'Expired Club Members' tab."""
new: int
"""Value from 'New Club Subscriptions' tab."""


def get_manager_member_counts(
club_id: str,
username: str,
password: str,
manager_page_load_delay: int = 5,
) -> MemberCounts:
) -> ManagerMemberCounts:
"""Get number of active, new, expired members from the Club Manager page.

This is a slow operation (circa 10s), so get them all in one go.
Expand Down Expand Up @@ -71,7 +68,7 @@ def get_manager_member_counts(
page = browser.new_page()

# login page
page.goto(club_manager_url_via_login(club_id))
page.goto(manager_url_via_login(club_id))
page.locator("id=username2").fill(username)
page.locator("id=password2").fill(password)
page.locator("id=login_button").click()
Expand Down Expand Up @@ -99,7 +96,7 @@ def get_manager_member_counts(
return _process_manager_member_counts(raw_member_counts)


def club_manager_url_via_login(club_id: str) -> str:
def manager_url_via_login(club_id: str) -> str:
"""Return URL of club's Club Manager page.

Parameters
Expand All @@ -110,38 +107,35 @@ def club_manager_url_via_login(club_id: str) -> str:
return f"{_MANAGER_VIA_LOGIN_BASE_URL}{club_id}/"


def _process_manager_member_counts(member_counts: dict[str, str]) -> MemberCounts:
def _process_manager_member_counts(
counts: dict[str, str],
) -> ManagerMemberCounts:
"""Process raw values.

Values are blank if there aren't any members (although they appear as zeros
during page load); convert these to 0 and ensure all are ints.

Raise exception if zero 'active members' value.
"""
processed_member_counts = {
key: int(value) if value else 0 for key, value in member_counts.items()
processed_counts = {
key: int(value) if value else 0 for key, value in counts.items()
}
# Assume an error if zero 'active' value.
# 'active' appears to be the slowest value to populate.
# 'new' will often be genuinely zero; 'expired' could be genuinely zero
if processed_member_counts["active"] == 0:
if processed_counts["active"] == 0:
error_message = (
"Active member count was zero; assuming issue with data collection. "
f"{pformat(processed_member_counts)}. "
f"{pformat(processed_counts)}. "
"Consider increasing `manager_page_load_delay`."
)
raise ValueError(error_message)

if not _is_membercounts(processed_member_counts):
raise TypeError
return processed_member_counts


def _is_membercounts(val: object) -> TypeGuard[MemberCounts]:
"""Check return type."""
if isinstance(val, dict):
return all(isinstance(v, int) for v in val.values())
return False
return ManagerMemberCounts(
active=processed_counts["active"],
expired=processed_counts["expired"],
new=processed_counts["new"],
)


def _log_info(logger: logging.Logger, message: str, start_time: float) -> None:
Expand Down
22 changes: 11 additions & 11 deletions britishcycling_clubs/profile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Functions to get information from a club's profile page."""

from typing import TypedDict
from typing import NamedTuple

import requests
from bs4 import BeautifulSoup, Tag
Expand All @@ -9,8 +9,8 @@
_REQUESTS_TIMEOUT = 10 # For `requests` library operations


class ProfileInfo(TypedDict):
"""Return type for `get_profile_info()` function."""
class ProfileInfo(NamedTuple):
"""Returned by `get_profile_info()` function."""

club_name: str
total_members: int
Expand All @@ -33,20 +33,20 @@ def get_profile_info(club_id: str) -> ProfileInfo:
`ValueError` :
if information can't be located.
"""
profile_url = club_profile_url(club_id)
r = requests.get(profile_url, timeout=_REQUESTS_TIMEOUT)
url = profile_url(club_id)
r = requests.get(url, timeout=_REQUESTS_TIMEOUT)
r.raise_for_status()
if r.url != profile_url:
if r.url != url:
error_message = f"Redirected to unexpected URL {r.url}. Is `club_id` valid?"
raise ValueError(error_message)
profile_soup = BeautifulSoup(r.content, "html.parser")
return {
"club_name": _club_name_from_profile(profile_soup),
"total_members": _total_members_from_profile(profile_soup),
}
return ProfileInfo(
club_name=_club_name_from_profile(profile_soup),
total_members=_total_members_from_profile(profile_soup),
)


def club_profile_url(club_id: str) -> str:
def profile_url(club_id: str) -> str:
"""Return URL of club's profile page.

Parameters
Expand Down
23 changes: 16 additions & 7 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,47 @@

import pytest

from britishcycling_clubs import club_manager_url_via_login
from britishcycling_clubs import manager_url_via_login
from britishcycling_clubs.manager import _process_manager_member_counts


def test_club_manager_url_via_login__happy_path() -> None:
"""Test that correct URL is returned."""
# arrange
club_id = "000"
# act
url = manager_url_via_login(club_id)
# act
assert (
club_manager_url_via_login("000")
url
== "https://www.britishcycling.org.uk/uac/connect?success_url=/dashboard/club/membership?club_id=000/"
)


def test__process_manager_member_counts__happy_path() -> None:
"""Test that raw values are converted to ints."""
# arrange
raw_counts = {
"active": "123",
"new": "",
"expired": "67",
}
assert _process_manager_member_counts(raw_counts) == {
"active": 123,
"new": 0,
"expired": 67,
}
# act
counts = _process_manager_member_counts(raw_counts)
# assert
assert counts.active == 123
assert counts.new == 0
assert counts.expired == 67


def test__process_manager_member_counts__blank_active_count_raises_exception() -> None:
"""Test that ValueError is raised if active is blank."""
# arrange
raw_counts = {
"active": "",
"new": "8",
"expired": "67",
}
# act, assert
with pytest.raises(ValueError):
_process_manager_member_counts(raw_counts)
6 changes: 2 additions & 4 deletions tests/test_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from bs4 import BeautifulSoup

from britishcycling_clubs import club_profile_url
from britishcycling_clubs import profile_url
from britishcycling_clubs.profile import (
_club_name_from_profile,
_total_members_from_profile,
Expand All @@ -11,9 +11,7 @@

def test_club_profile_url__happy_path() -> None:
"""Test that correct URL is returned."""
assert (
club_profile_url("000") == "https://www.britishcycling.org.uk/club/profile/000/"
)
assert profile_url("000") == "https://www.britishcycling.org.uk/club/profile/000/"


# Partial extract from actual page
Expand Down