Skip to content

Commit a6c073b

Browse files
authored
Merge pull request #42 from samueljim/master
Load the un-detailed letterbox list page in order to be able to also load watchlists.
2 parents 37f7eec + 56e47dd commit a6c073b

File tree

1 file changed

+41
-12
lines changed

1 file changed

+41
-12
lines changed

plugins/letterboxd.py

+41-12
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from utils.base_plugin import ListScraper
33
import bs4
44
import requests
5-
import time
65
from loguru import logger
76

87
class Letterboxd(ListScraper):
@@ -16,7 +15,16 @@ def get_list(list_id, config=None):
1615
movies = []
1716

1817
while True:
19-
r = requests.get(f"https://letterboxd.com/{list_id}/detail/by/release-earliest/page/{page_number}/", headers={'User-Agent': 'Mozilla/5.0'})
18+
print("Page number: ", page_number)
19+
watchlist = list_id.endswith("/watchlist")
20+
21+
if watchlist:
22+
r = requests.get(f"https://letterboxd.com/{list_id}/by/release-earliest/page/{page_number}/", headers={'User-Agent': 'Mozilla/5.0'})
23+
24+
list_name = list_id.split("/")[0] + " Watchlist"
25+
description = "Watchlist for " + list_id.split("/")[0]
26+
else:
27+
r = requests.get(f"https://letterboxd.com/{list_id}/detail/by/release-earliest/page/{page_number}/", headers={'User-Agent': 'Mozilla/5.0'})
2028

2129
soup = bs4.BeautifulSoup(r.text, 'html.parser')
2230

@@ -30,22 +38,43 @@ def get_list(list_id, config=None):
3038
else:
3139
description = ""
3240

33-
for movie_soup in soup.find_all('div', {'class': 'film-detail-content'}):
34-
movie_name = movie_soup.find('h2', {'class': 'headline-2 prettify'}).find('a').text
35-
movie_year = movie_soup.find('small', {'class': 'metadata'})
36-
if movie_year is not None:
37-
movie_year = movie_year.text
38-
movie = {"title": movie_name, "release_year": movie_year, "media_type": "movie"}
41+
if watchlist:
42+
page = soup.find_all('li', {'class': 'poster-container'})
43+
else:
44+
page = soup.find_all('div', {'class': 'film-detail-content'})
3945

40-
# Find the imdb id
41-
if config.get("imdb_id_filter", False):
42-
r = requests.get(f"https://letterboxd.com{movie_soup.find('a')['href']}", headers={'User-Agent': 'Mozilla/5.0'})
46+
for movie_soup in page:
47+
if watchlist:
48+
movie = {"title": movie_soup.find('img').attrs['alt'], "media_type": "movie"}
49+
link = movie_soup.find('div', {'class': 'film-poster'})['data-target-link']
50+
else:
51+
movie = {"title": movie_soup.find('h2', {'class': 'headline-2 prettify'}).find('a').text, "media_type": "movie"}
52+
movie_year = movie_soup.find('small', {'class': 'metadata'})
53+
if movie_year is not None:
54+
movie["release_year"] = movie_year.text
55+
56+
link = movie_soup.find('a')['href']
57+
58+
59+
if config.get("imdb_id_filter", False) or 'release_year' not in movie:
60+
logger.info(f"Getting release year and imdb details for: {movie['title']}")
61+
62+
# Find the imdb id and release year
63+
r = requests.get(f"https://letterboxd.com{link}", headers={'User-Agent': 'Mozilla/5.0'})
4364
movie_soup = bs4.BeautifulSoup(r.text, 'html.parser')
65+
4466
imdb_id = movie_soup.find("a", {"data-track-action":"IMDb"})
67+
movie_year = movie_soup.find("div", {"class": "releaseyear"})
68+
4569
if imdb_id is not None:
4670
movie["imdb_id"] = imdb_id["href"].split("/title/")[1].split("/")[0]
4771

48-
movies.append(movie)
72+
if movie_year is not None:
73+
movie["release_year"] = movie_year.text
74+
75+
# If a movie doesn't have a year, that means that the movie is only just announced and we don't even know when it's coming out. We can easily ignore these because movies will have a year of release by the time they come out.
76+
if 'release_year' in movie:
77+
movies.append(movie)
4978

5079
if soup.find('a', {'class': 'next'}):
5180
page_number += 1

0 commit comments

Comments
 (0)