Skip to content
Permalink
Browse files
Add files via upload
  • Loading branch information
nafaya committed Nov 25, 2021
1 parent 63ec376 commit 9a4cb620c3dc189c11ca97f7cf43ab5cbca06e71
Showing 1 changed file with 67 additions and 0 deletions.
@@ -0,0 +1,67 @@
import numpy as np
import pandas as pd
import ast
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

# Merge movies and credits
movies = movies.merge(credits,on='title')

# list of columns we will keep
# Genres, Original Language, Original Title, Overview,
movies = movies[['movie_id','title','genres','overview','cast']]


movies.dropna(inplace=True) # removing extra(Null Columns)
(movies.isnull().sum())
(movies.duplicated().sum()) # Checking for Duplicate Data

def convertGenres (obj): # A function for retrieving the Genres
L = []
for i in ast.literal_eval(obj):
L.append(i['name'])
return L

movies["genres"] = movies['genres'].apply(convertGenres)

def convertCast (obj): # a function for retreiving the first 3 names from Cast
l=[]
counter = 0
for i in ast.literal_eval(obj):
if counter !=3:
l.append(i['name'])
counter = counter + 1
else:
break
return l
movies['cast'] = (movies['cast'].apply(convertCast))

# Turning overview from string into list
movies['overview'] = movies['overview'].apply(lambda x: x.split())

movies['genres'] = movies["genres"].apply(lambda x:[i.replace(" ","") for i in x])

# Concatinating Overview and Cast and genres into one column
movies['Movie_Info'] = movies['overview'] + movies['cast']

# Creating a new data frame (Removing Overview and Cast and genres From it and adding Movie_info
new_df = movies[['movie_id','title','genres','Movie_Info']]

desired_width=320
pd.set_option('display.width', desired_width)
pd.set_option('display.max_columns',10)















0 comments on commit 9a4cb62

Please sign in to comment.