2
2
from utils .base_plugin import ListScraper
3
3
import bs4
4
4
import requests
5
+ import time
6
+ from loguru import logger
5
7
6
8
class Letterboxd (ListScraper ):
7
9
@@ -14,7 +16,9 @@ def get_list(list_id, config=None):
14
16
movies = []
15
17
16
18
while True :
19
+ print ("Page number: " , page_number )
17
20
r = requests .get (f"https://letterboxd.com/{ list_id } /detail/by/release-earliest/page/{ page_number } /" , headers = {'User-Agent' : 'Mozilla/5.0' })
21
+
18
22
soup = bs4 .BeautifulSoup (r .text , 'html.parser' )
19
23
20
24
if list_name is None :
@@ -26,15 +30,18 @@ def get_list(list_id, config=None):
26
30
27
31
for movie_soup in soup .find_all ('div' , {'class' : 'film-detail-content' }):
28
32
movie_name = movie_soup .find ('h2' , {'class' : 'headline-2 prettify' }).find ('a' ).text
29
- movie_year = movie_soup .find ('small' , {'class' : 'metadata' }).text
33
+ movie_year = movie_soup .find ('small' , {'class' : 'metadata' })
34
+ if movie_year is not None :
35
+ movie_year = movie_year .text
30
36
movie = {"title" : movie_name , "release_year" : movie_year , "media_type" : "movie" }
31
37
32
38
# Find the imdb id
33
39
if config .get ("imdb_id_filter" , False ):
34
40
r = requests .get (f"https://letterboxd.com{ movie_soup .find ('a' )['href' ]} " , headers = {'User-Agent' : 'Mozilla/5.0' })
35
41
movie_soup = bs4 .BeautifulSoup (r .text , 'html.parser' )
36
- imdb_id = movie_soup .find ("a" , {"data-track-action" :"IMDb" })["href" ].split ("/title/" )[1 ].split ("/" )[0 ]
37
- movie ["imdb_id" ] = imdb_id
42
+ imdb_id = movie_soup .find ("a" , {"data-track-action" :"IMDb" })
43
+ if imdb_id is not None :
44
+ movie ["imdb_id" ] = imdb_id ["href" ].split ("/title/" )[1 ].split ("/" )[0 ]
38
45
39
46
movies .append (movie )
40
47
0 commit comments