-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
60 lines (47 loc) · 1.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import asyncpraw
from dotenv import load_dotenv
import os
load_dotenv()
client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')
user_agent = 'Scraper 1.0 by /u/Neither-Trick2134'
reddit = asyncpraw.Reddit(
client_id=client_id,
client_secret=client_secret,
user_agent=user_agent,
)
async def search_reddit(query, limit=10):
subreddit = await reddit.subreddit('all')
posts = []
async for submission in subreddit.search(query, limit=limit):
post = {
'title': submission.title,
'id': submission.id,
'url': submission.url,
'comments': submission.num_comments,
'body': submission.selftext
}
posts.append(post)
return posts
async def extract_comments(submission_id):
submission = await reddit.submission(id=submission_id)
await submission.comments.replace_more(limit=None)
comments = []
async def process_comment(comment, parent_id=None):
comments.append({
'comment_body': comment.body,
'comment_score': comment.score,
})
for reply in comment.replies:
await process_comment(reply, parent_id=comment.id)
for top_level_comment in submission.comments:
await process_comment(top_level_comment)
return comments
async def search_reddit_comments(query: str):
posts = await search_reddit(query=query, limit=10)
text = ""
for post in posts:
comments = await extract_comments(post['id'])
for item in comments:
text += (item['comment_body'])
return text