-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSana_css4p01.py
136 lines (93 loc) · 4.32 KB
/
Sana_css4p01.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 5 00:31:05 2024
@author: patel
"""
'Question 1'
import pandas as pd
movie_df = pd.read_csv("\\Users\\patel\CHPC\\movie_dataset.csv")
highest_rated_movie = movie_df[movie_df['Rating'] == movie_df['Rating'].max()]['Title'].values[0]
print('Q1')
print(highest_rated_movie)
'Question 2'
average_revenue = movie_df['Revenue (Millions)'].mean()
print('Q2')
print(average_revenue)
'Question 3'
# Filter the dataframe for movies released between 2015 to 2017
filtered_movie_df = movie_df[(movie_df['Year'] >= 2015) & (movie_df['Year'] <= 2017)]
# Calculate the average revenue for the filtered movies
average_revenue_2015_to_2017 = filtered_movie_df['Revenue (Millions)'].mean()
print('Q3')
print(average_revenue_2015_to_2017)
'Question 4'
# Filter the dataframe for movies released in 2016
movies_2016 = movie_df[movie_df['Year'] == 2016]
# Count the number of movies released in 2016
No_movies_2016 = len(movies_2016)
print('Q4')
print(No_movies_2016)
'Question 5'
# To find the number of movies directed by Christopher Nolan
Nolan_directed_movies = movie_df[movie_df['Director'] == 'Christopher Nolan'].shape[0]
print('Q5')
print(Nolan_directed_movies)
'Question 6'
# Count the number of movies with a rating of at least 8.0
No_high_rated_movies = movie_df[movie_df['Rating'] >= 8.0].shape[0]
print('Q6')
print(No_high_rated_movies)
'Question 7'
# Filter the dataframe for movies directed by Christopher Nolan
Nolan_movies = movie_df[movie_df['Director'] == 'Christopher Nolan']
# Calculate the median rating of movies directed by Christopher Nolan
median_rating_Nolan_directed = Nolan_movies['Rating'].median()
print('Q7')
print(median_rating_Nolan_directed)
'Question 8'
# Calculate the average rating for each year and find the year with the highest average rating
Average_rating_yearly = movie_df.groupby('Year')['Rating'].mean()
year_with_highest_rating = Average_rating_yearly.idxmax()
print('Q8')
print(year_with_highest_rating)
'Question 9'
# Count the number of movies made in 2006
No_movies_2006 = movie_df[movie_df['Year'] == 2006].shape[0]
# Count the number of movies made in 2016
No_movies_2016 = movie_df[movie_df['Year'] == 2016].shape[0]
# Calculate the percentage increase
percentage_increase = ((No_movies_2016 - No_movies_2006) / No_movies_2006) * 100
print('Q9')
print(percentage_increase)
'Question 10'
from collections import Counter
# Combine all the actor names into a single list
all_actors = movie_df['Actors'].str.split(', ').sum()
# Count the occurrences of each actor
actor_counts = Counter(all_actors)
# Find the most common actor
most_common_actor = actor_counts.most_common(1)[0]
print('Q10')
print(most_common_actor)
'Question 11'
# Split the genres and create a list of all unique genres
unique_genres = set(movie_df['Genre'].str.split(',').explode())
# Count the number of unique genres
No_unique_genres = len(unique_genres)
print('Q11')
print(No_unique_genres)
'Question 12'
# Exclude non-numeric columns
numeric_columns = movie_df.select_dtypes(include='number')
# Calculate the correlation matrix
correlation_matrix = numeric_columns.corr()
#1. Positive correlation between Rating and Metascore - A higher ratings is associated with higher metascore which indicates it has received more positive reviews'
#2. Negative correlation between Rank and votes - Less ranked movies usually have less votes and fewer votes.
#3. No clear correlation between number of Years and other features - There isn't a clear significant correlation between the year movie was released and its rank, Runtime, Rating, Votes and Metascore. This implies that the year of release alone doesn't dictate a movies success or its quality.
#4. Moderate positive correlation between Revenue and votes.
#5. No strong correlation between Runtime and Revenue.
#ADVICE FOR DIRECTORS TO PRODUCE BETTER MOVIES:
#1. To raise ratings: Draw in more people, put an emphasis on excellent storytelling and production value.
#2. Always interact with the audience and take their preferences.Seek criticism and identify areas that needs to be looked at and refine upcoming projects.
#3. Work with skilled cinematographers to create visually stunning and impactful scenes.
'Question 13'