Skip to content
Permalink
Browse files
Add files via upload
  • Loading branch information
nafaya committed Nov 19, 2021
1 parent 9b2f37f commit 73d96b17838782c7d5b82236dc3ae4aedb8f5e50
Show file tree
Hide file tree
Showing 2 changed files with 4,851 additions and 0 deletions.
@@ -0,0 +1,47 @@
import numpy as np
import pandas as pd
import ast
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

# Merge movies and credits
movies = movies.merge(credits,on='title')

# list of columns we will keep
# Genres, Original Language, Original Title, Overview,
movies = movies[['title','overview','genres','original_language','cast']]


movies.dropna(inplace=True) # removing extra(Null Columns)
(movies.isnull().sum())
(movies.duplicated().sum()) # Checking for Duplicate Data

def convertGenres (obj): # A function for retrieving the Genres
L = []
for i in ast.literal_eval(obj):
L.append(i['name'])
return L

movies["genres"] = movies['genres'].apply(convertGenres)

def convertCast (obj): # a function for retreiving the first 3 names from Cast
l=[]
counter = 0
for i in ast.literal_eval(obj):
if counter !=3:
l.append(i['name'])
counter = counter + 1
else:
break
return l
movies['cast'] = (movies['cast'].apply(convertCast))

print(movies)








0 comments on commit 73d96b1

Please sign in to comment.