-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze.py
205 lines (172 loc) · 7.9 KB
/
analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import os
import json
import requests
import argparse
import pandas as pd
import matplotlib.pyplot as plt
from config import API_KEY, MAX_RESULTS, YOUTUBE_API_URL
# Function to fetch channel information
def get_channel_info(channel_id):
url = f"{YOUTUBE_API_URL}channels?part=snippet,statistics&id={channel_id}&key={API_KEY}"
response = requests.get(url)
# Check if the response status is OK
if response.status_code == 200:
channel_data = response.json()
# If 'items' is not in the response, print the entire response to debug
if 'items' not in channel_data:
print(f"Error: No 'items' found in the response. Full response: {json.dumps(channel_data, indent=2)}")
return None
return channel_data
else:
print(f"Error fetching channel info: {response.status_code}")
return None
# Function to fetch video details from a channel
def get_video_details(channel_id):
url = f"{YOUTUBE_API_URL}search?part=snippet&channelId={channel_id}&maxResults={MAX_RESULTS}&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
return response.json()['items']
else:
print(f"Error fetching video details: {response.status_code}")
return None
# Function to fetch video metrics (views, likes, etc.)
def get_video_metrics(video_id):
url = f"{YOUTUBE_API_URL}videos?part=statistics&id={video_id}&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
return response.json()['items'][0]['statistics']
else:
print(f"Error fetching video metrics: {response.status_code}")
return None
# Function to generate output in Markdown format
def generate_markdown(channel_info, video_data):
# Begin markdown output
output = f"# YouTube Channel Analysis - {channel_info['channel_name']}\n\n"
# Channel Information
output += "## Channel Information\n"
output += f"- **Channel Name**: {channel_info['channel_name']}\n"
output += f"- **Subscribers**: {channel_info['subscribers']}\n"
output += f"- **Total Views**: {channel_info['total_views']}\n"
output += f"- **Total Videos**: {channel_info['total_videos']}\n\n"
# Video Details
output += "## Video Details\n\n"
if video_data:
output += "| Video Title | Video URL | Views | Likes | Comments |\n"
output += "|---------------------------|----------------------------------------|---------|---------|----------|\n"
for video in video_data:
output += f"| {video['title']} | {video['url']} | {video['views']} | {video['likes']} | {video['comments']} |\n"
else:
output += "No valid videos found.\n\n"
# Generate various charts
if video_data:
output += "### Video Analytics Charts\n"
# Create and save various charts
plot_video_performance(video_data)
plt.savefig('video_performance.png')
output += "![Video Performance](video_performance.png)\n"
plot_likes_vs_views(video_data)
plt.savefig('likes_vs_views.png')
output += "![Likes vs Views](likes_vs_views.png)\n"
plot_comments(video_data)
plt.savefig('comments_chart.png')
output += "![Comments](comments_chart.png)\n"
plot_likes_to_views_ratio(video_data)
plt.savefig('likes_to_views_ratio.png')
output += "![Likes to Views Ratio](likes_to_views_ratio.png)\n"
return output
# Function to plot video performance (Views vs Likes)
def plot_video_performance(video_data):
df = pd.DataFrame(video_data)
df = df.sort_values(by="views", ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.barh(df['title'], df['views'], color='skyblue')
plt.xlabel('Views')
plt.title('Top 10 Videos by Views')
# Function to plot Likes vs Views
def plot_likes_vs_views(video_data):
df = pd.DataFrame(video_data)
df = df.sort_values(by="views", ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.scatter(df['views'], df['likes'], color='green')
plt.xlabel('Views')
plt.ylabel('Likes')
plt.title('Likes vs Views for Top 10 Videos')
# Function to plot comments distribution
def plot_comments(video_data):
df = pd.DataFrame(video_data)
df = df.sort_values(by="views", ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.barh(df['title'], df['comments'], color='orange')
plt.xlabel('Comments')
plt.title('Top 10 Videos by Comments')
# Function to plot Likes to Views Ratio
def plot_likes_to_views_ratio(video_data):
df = pd.DataFrame(video_data)
df['Likes to Views Ratio'] = df['likes'] / df['views']
df = df.sort_values(by="Likes to Views Ratio", ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.barh(df['title'], df['Likes to Views Ratio'], color='purple')
plt.xlabel('Likes to Views Ratio')
plt.title('Likes to Views Ratio of Top 10 Videos')
# Function to analyze channel and videos
def analyze_channel(channel_id):
print("Fetching channel information...")
channel_data = get_channel_info(channel_id)
if channel_data:
channel_info = channel_data['items'][0]
channel_name = channel_info['snippet']['title']
subscriber_count = channel_info['statistics']['subscriberCount']
total_views = channel_info['statistics']['viewCount']
total_videos = channel_info['statistics']['videoCount']
channel_info_dict = {
'channel_name': channel_name,
'subscribers': subscriber_count,
'total_views': total_views,
'total_videos': total_videos
}
print(f"Channel: {channel_name}")
print(f"Subscribers: {subscriber_count}")
print(f"Total Views: {total_views}")
print(f"Total Videos: {total_videos}")
print("\nFetching video details...")
videos = get_video_details(channel_id)
if videos:
video_data = []
for video in videos:
# Check if the video item has a 'videoId'
if 'id' in video and 'videoId' in video['id']:
video_id = video['id']['videoId']
video_title = video['snippet']['title']
video_url = f"https://www.youtube.com/watch?v={video_id}"
metrics = get_video_metrics(video_id)
if metrics:
views = int(metrics.get('viewCount', 0))
likes = int(metrics.get('likeCount', 0))
comments = int(metrics.get('commentCount', 0))
video_data.append({
'title': video_title,
'url': video_url,
'views': views,
'likes': likes,
'comments': comments
})
else:
# Handle cases where the video doesn't have a 'videoId'
print(f"Skipping non-video item: {video['snippet']['title']}")
# Generate the markdown output
markdown_output = generate_markdown(channel_info_dict, video_data)
# Write the output to the file
with open("output.md", "w") as f:
f.write(markdown_output)
print("Analysis saved to output.md.")
else:
print("No videos found for this channel.")
else:
print("Error: Could not fetch channel info.")
if __name__ == "__main__":
# Set up argument parsing
parser = argparse.ArgumentParser(description="Analyze YouTube Channel content")
parser.add_argument('--channel_id', type=str, required=True, help="The YouTube Channel ID")
args = parser.parse_args()
# Analyze the provided channel ID
analyze_channel(args.channel_id)