1
1
import bs4
2
2
import requests
3
- import csv
3
+ import json
4
4
from utils .base_plugin import ListScraper
5
5
6
6
class IMDBList (ListScraper ):
@@ -13,9 +13,26 @@ def get_list(list_id, config=None):
13
13
list_name = soup .find ('h1' ).text
14
14
description = soup .find ("div" , {"class" : "list-description" }).text
15
15
16
- r = requests . get ( f'https://www.imdb.com/list/ { list_id } /export' , headers = { 'Accept-Language' : 'en-US' , 'User-Agent' : 'Mozilla/5.0' })
17
- reader = csv . DictReader ( r . text . splitlines () )
16
+ ld_json = soup . find ( "script" , { "type" : "application/ld+json" }). text
17
+ ld_json = json . loads ( ld_json )
18
18
movies = []
19
- for row in reader :
20
- movies .append ({'title' : row ['Title' ], 'release_year' : row ['Year' ], "media_type" : row ['Title Type' ], "imdb_id" : row ['Const' ]})
19
+ for row in ld_json ["itemListElement" ]:
20
+ url_parts = row ["item" ]["url" ].split ("/" )
21
+ url_parts = [p for p in url_parts if p != "" ]
22
+
23
+ release_year = None
24
+ if config .get ("add_release_year" , False ):
25
+ # Get release_date
26
+ r = requests .get (row ["item" ]["url" ], headers = {'Accept-Language' : 'en-US' , 'User-Agent' : 'Mozilla/5.0' , 'Accept-Language' : 'en-US' })
27
+ soup = bs4 .BeautifulSoup (r .text , 'html.parser' )
28
+ movie_json = soup .find ("script" , {"type" : "application/ld+json" }).text
29
+ release_year = json .loads (movie_json )["datePublished" ].split ("-" )[0 ]
30
+
31
+ movies .append ({
32
+ "title" : row ["item" ]["name" ],
33
+ "release_year" : release_year ,
34
+ "media_type" : row ["item" ]["@type" ],
35
+ "imdb_id" : url_parts [- 1 ]
36
+ })
37
+
21
38
return {'name' : list_name , 'items' : movies , "description" : description }
0 commit comments