Add files via upload

nafaya · Nov 25, 2021 · 9a4cb620c3dc189c11ca97f7cf43ab5cbca06e71 · 9a4cb62
1 parent 63ec376
commit 9a4cb620c3dc189c11ca97f7cf43ab5cbca06e71
Showing 1 changed file with 67 additions and 0 deletions.
diff --git a/Movie_Data.py b/Movie_Data.py
@@ -0,0 +1,67 @@
+import numpy as np
+import pandas as pd
+import ast
+movies = pd.read_csv('tmdb_5000_movies.csv')
+credits = pd.read_csv('tmdb_5000_credits.csv')
+
+# Merge movies and credits
+movies = movies.merge(credits,on='title')
+
+# list of columns we will keep
+# Genres, Original Language, Original Title, Overview,
+movies = movies[['movie_id','title','genres','overview','cast']]
+
+
+movies.dropna(inplace=True) # removing extra(Null Columns)
+(movies.isnull().sum())
+(movies.duplicated().sum()) # Checking for Duplicate Data
+
+def convertGenres (obj): # A function for retrieving the Genres
+    L = []
+    for i in ast.literal_eval(obj):
+        L.append(i['name'])
+    return L
+
+movies["genres"] = movies['genres'].apply(convertGenres)
+
+def convertCast (obj): # a function for retreiving the first 3 names from Cast
+    l=[]
+    counter = 0
+    for i in ast.literal_eval(obj):
+        if counter !=3:
+           l.append(i['name'])
+           counter = counter + 1
+        else:
+            break
+    return l
+movies['cast'] = (movies['cast'].apply(convertCast))
+
+# Turning overview from string into list
+movies['overview'] = movies['overview'].apply(lambda x: x.split())
+
+movies['genres'] = movies["genres"].apply(lambda x:[i.replace(" ","") for i in x])
+
+# Concatinating Overview and Cast and genres into one column
+movies['Movie_Info'] = movies['overview'] + movies['cast']
+
+# Creating a new data frame (Removing Overview and Cast and genres From it and adding Movie_info
+new_df = movies[['movie_id','title','genres','Movie_Info']]
+
+desired_width=320
+pd.set_option('display.width', desired_width)
+pd.set_option('display.max_columns',10)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+