Skip to content

Commit a3e36e6

Browse files
committed
Made letterboxd more robust #38
1 parent a2287c3 commit a3e36e6

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

plugins/letterboxd.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from utils.base_plugin import ListScraper
33
import bs4
44
import requests
5+
import time
6+
from loguru import logger
57

68
class Letterboxd(ListScraper):
79

@@ -14,7 +16,9 @@ def get_list(list_id, config=None):
1416
movies = []
1517

1618
while True:
19+
print("Page number: ", page_number)
1720
r = requests.get(f"https://letterboxd.com/{list_id}/detail/by/release-earliest/page/{page_number}/", headers={'User-Agent': 'Mozilla/5.0'})
21+
1822
soup = bs4.BeautifulSoup(r.text, 'html.parser')
1923

2024
if list_name is None:
@@ -26,15 +30,18 @@ def get_list(list_id, config=None):
2630

2731
for movie_soup in soup.find_all('div', {'class': 'film-detail-content'}):
2832
movie_name = movie_soup.find('h2', {'class': 'headline-2 prettify'}).find('a').text
29-
movie_year = movie_soup.find('small', {'class': 'metadata'}).text
33+
movie_year = movie_soup.find('small', {'class': 'metadata'})
34+
if movie_year is not None:
35+
movie_year = movie_year.text
3036
movie = {"title": movie_name, "release_year": movie_year, "media_type": "movie"}
3137

3238
# Find the imdb id
3339
if config.get("imdb_id_filter", False):
3440
r = requests.get(f"https://letterboxd.com{movie_soup.find('a')['href']}", headers={'User-Agent': 'Mozilla/5.0'})
3541
movie_soup = bs4.BeautifulSoup(r.text, 'html.parser')
36-
imdb_id = movie_soup.find("a", {"data-track-action":"IMDb"})["href"].split("/title/")[1].split("/")[0]
37-
movie["imdb_id"] = imdb_id
42+
imdb_id = movie_soup.find("a", {"data-track-action":"IMDb"})
43+
if imdb_id is not None:
44+
movie["imdb_id"] = imdb_id["href"].split("/title/")[1].split("/")[0]
3845

3946
movies.append(movie)
4047

0 commit comments

Comments
 (0)