-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtop_10_movie_analyzer.py
135 lines (115 loc) · 4.49 KB
/
top_10_movie_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load the dataset
file_path = '/content/imdb.csv'
data = pd.read_csv(file_path, encoding='ISO-8859-1')
top_10_data = data.nlargest(10, 'popularity')
def plot_correlation_matrix():
correlation_matrix = top_10_data.corr(numeric_only=True)
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Matrix (Top 10 Movies)')
plt.show()
def plot_genres_distribution():
plt.figure(figsize=(14, 6))
sns.countplot(y='genres', data=top_10_data, order=top_10_data['genres'].value_counts().index)
plt.title('Distribution of Genres (Top 10 Movies)')
plt.xlabel('Number of Movies')
plt.ylabel('Genres')
plt.show()
def plot_release_year_distribution():
plt.figure(figsize=(14, 6))
sns.countplot(x='release_year', data=top_10_data, order=top_10_data['release_year'].value_counts().index)
plt.title('Number of Movies Released per Year (Top 10 Movies)')
plt.xlabel('Release Year')
plt.ylabel('Number of Movies')
plt.xticks(rotation=45)
plt.show()
def plot_budget_vs_revenue():
plt.figure(figsize=(10, 6))
sns.scatterplot(x='budget', y='revenue', data=top_10_data)
plt.title('Budget vs. Revenue (Top 10 Movies)')
plt.xlabel('Budget')
plt.ylabel('Revenue')
plt.show()
def plot_vote_avg_vs_popularity():
plt.figure(figsize=(10, 6))
sns.scatterplot(x='vote_average', y='popularity', data=top_10_data)
plt.title('Vote Average vs. Popularity (Top 10 Movies)')
plt.xlabel('Vote Average')
plt.ylabel('Popularity')
plt.show()
def plot_budget_distribution():
plt.figure(figsize=(10, 6))
sns.histplot(top_10_data['budget'], bins=10, kde=True)
plt.title('Distribution of Budget (Top 10 Movies)')
plt.xlabel('Budget')
plt.ylabel('Frequency')
plt.show()
def plot_runtime_by_genre():
plt.figure(figsize=(14, 8))
sns.boxplot(x='runtime', y='genres', data=top_10_data, order=top_10_data['genres'].value_counts().index)
plt.title('Runtime by Genre (Top 10 Movies)')
plt.xlabel('Runtime (minutes)')
plt.ylabel('Genres')
plt.show()
def plot_pairplot():
sns.pairplot(top_10_data[['budget', 'revenue', 'popularity', 'vote_average']])
plt.suptitle('Pairplot of Budget, Revenue, Popularity, and Vote Average (Top 10 Movies)', y=1.02)
plt.show()
def plot_genre_pie_chart():
genre_counts = top_10_data['genres'].value_counts()
plt.figure(figsize=(10, 10))
plt.pie(genre_counts, labels=genre_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Proportion of Movies by Genre (Top 10 Movies)')
plt.show()
def show_summary_statistics():
print(top_10_data.describe())
# Menu to choose analysis for top 10 movies
def analysis_menu():
while True:
print("\nSelect the analysis you want to perform:")
print("1. Correlation Matrix")
print("2. Genres Distribution (Bar Chart)")
print("3. Movies Released per Year (Bar Chart)")
print("4. Budget vs Revenue (Scatter Plot)")
print("5. Vote Average vs Popularity (Scatter Plot)")
print("6. Budget Distribution (Histogram)")
print("7. Runtime by Genre (Boxplot)")
print("8. Pairplot of Key Numerical Variables")
print("9. Proportion of Movies by Genre (Pie Chart)")
print("10. Summary Statistics")
print("11. Exit")
choice = input("Enter your choice (1-11): ")
if choice == '1':
plot_correlation_matrix()
elif choice == '2':
plot_genres_distribution()
elif choice == '3':
plot_release_year_distribution()
elif choice == '4':
plot_budget_vs_revenue()
elif choice == '5':
plot_vote_avg_vs_popularity()
elif choice == '6':
plot_budget_distribution()
elif choice == '7':
plot_runtime_by_genre()
elif choice == '8':
plot_pairplot()
elif choice == '9':
plot_genre_pie_chart()
elif choice == '10':
show_summary_statistics()
elif choice == '11':
print("Exiting the menu.")
break
else:
print("Invalid choice. Please select a number between 1 and 11.")
print("\n\n\n")
another = input("Do you want to perform another analysis? (yes/no): ").strip().lower()
if another != 'yes':
print("Session ended.")
break
analysis_menu()