Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import numpy as np
import pandas as pd
import ast
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')
# Merge movies and credits
movies = movies.merge(credits,on='title')
# list of columns we will keep
# Genres, Original Language, Original Title, Overview,
movies = movies[['movie_id','title','genres','overview','cast']]
movies.dropna(inplace=True) # removing extra(Null Columns)
(movies.isnull().sum())
(movies.duplicated().sum()) # Checking for Duplicate Data
def convertGenres (obj): # A function for retrieving the Genres
L = []
for i in ast.literal_eval(obj):
L.append(i['name'])
return L
movies["genres"] = movies['genres'].apply(convertGenres)
def convertCast (obj): # a function for retreiving the first 3 names from Cast
l=[]
counter = 0
for i in ast.literal_eval(obj):
if counter !=3:
l.append(i['name'])
counter = counter + 1
else:
break
return l
movies['cast'] = (movies['cast'].apply(convertCast))
# Turning overview from string into list
movies['overview'] = movies['overview'].apply(lambda x: x.split())
movies['genres'] = movies["genres"].apply(lambda x:[i.replace(" ","") for i in x])
# Concatinating Overview and Cast and genres into one column
movies['Movie_Info'] = movies['overview'] + movies['cast']
# Creating a new data frame (Removing Overview and Cast and genres From it and adding Movie_info
new_df = movies[['movie_id','title','genres','Movie_Info']]
desired_width=320
pd.set_option('display.width', desired_width)
pd.set_option('display.max_columns',10)