Untitled.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "american-florida",
   "metadata": {},
   "source": [
    "Python Notebook\n",
    "This Python Notebook is Property of Premobowei Miriki and was created as part of a movie recommendation project.\n",
    "Creating a Movie Recommendation system using a Neural Collaborative Filtering approach\n",
    "I will be using the Movielens 25M dataset which consists of 25 million user rating applied to 62,000 movies by 162,000 users.\n",
    "The dataset can be found at https://grouplens.org/datasets/movielens/"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "overhead-arlington",
   "metadata": {},
   "source": [
    "# Load the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dress-employee",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n",
      "/Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    }
   ],
   "source": [
    "# Load required models\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "\n",
    "from keras.layers import Input, Embedding, Flatten, Dot, Dense, Multiply, Concatenate\n",
    "from keras.models import Model\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "eastern-thousand",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['KMP_DUPLICATE_LIB_OK']='True'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "funny-investor",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Load dataset\n",
    "dataset = pd.read_csv(\"ratings.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "shaped-oxygen",
   "metadata": {},
   "source": [
    "# Data Pre-Processing"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "amended-mattress",
   "metadata": {},
   "source": [
    "## Data Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "median-retirement",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>296</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1147880044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>306</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1147868817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>307</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1147868828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>665</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1147878820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>899</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1147868510</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating   timestamp\n",
       "0       1      296     5.0  1147880044\n",
       "1       1      306     3.5  1147868817\n",
       "2       1      307     5.0  1147868828\n",
       "3       1      665     5.0  1147878820\n",
       "4       1      899     3.5  1147868510"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Explore the dataset\n",
    "dataset.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "comparative-campbell",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 25000095 entries, 0 to 25000094\n",
      "Data columns (total 4 columns):\n",
      " #   Column     Dtype  \n",
      "---  ------     -----  \n",
      " 0   userId     int64  \n",
      " 1   movieId    int64  \n",
      " 2   rating     float64\n",
      " 3   timestamp  int64  \n",
      "dtypes: float64(1), int64(3)\n",
      "memory usage: 762.9 MB\n"
     ]
    }
   ],
   "source": [
    "# This gives information the data being worked with. It shows the datatype of each field which is helpful when\n",
    "# collecting new user input. \n",
    "dataset.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "enclosed-making",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check for missing user Id's\n",
    "dataset['userId'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "elder-abuse",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check for missing movie Id's\n",
    "dataset['movieId'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "proud-antibody",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check for missing ratings\n",
    "dataset['rating'].isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "afraid-division",
   "metadata": {},
   "source": [
    "Since al the null checks come back as 0. This means that all fields in the data are filled up and the data is clean \n",
    "to use"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ignored-crisis",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Find the minimum and maximum rating given to the movie by the users\n",
    "dataset['rating'].min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "aggregate-contrast",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "dataset['rating'].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "clean-pottery",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "59047"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create two variable to define the number of users and number of movies in the dataset\n",
    "movies_len = len(dataset.movieId.unique())\n",
    "movies_len"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "correct-rainbow",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2001185          1\n",
       "10627899         1\n",
       "4075778          1\n",
       "19245863         1\n",
       "21816622         1\n",
       "             ...  \n",
       "18457961    209157\n",
       "17864443    209159\n",
       "1036618     209163\n",
       "18457962    209169\n",
       "18457963    209171\n",
       "Name: movieId, Length: 25000095, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.movieId.sort_values(ascending = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "permanent-general",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "162541"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users_len = len(dataset.userId.unique())\n",
    "users_len"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bronze-savannah",
   "metadata": {},
   "source": [
    "Note: As seen direcly above the shape of the ratings dataset given is (25000095, 4). Meaning it has 25,000,095 columns\n",
    "and 4 rows. The dataset contains 62,000 movies and 162,000 users. This means that the maximum amount of rating we can have is 1,004,000,000 ratings. However as seen in the analysis above we only have 25,000,095 ratings which makes up only 0.25% of all the total possible values. Therefore it is safe to say that the data is quite sparse."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "functioning-recycling",
   "metadata": {},
   "source": [
    "## Change rating scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "pleased-holly",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>296</td>\n",
       "      <td>1</td>\n",
       "      <td>1147880044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>306</td>\n",
       "      <td>1</td>\n",
       "      <td>1147868817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>307</td>\n",
       "      <td>1</td>\n",
       "      <td>1147868828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>665</td>\n",
       "      <td>1</td>\n",
       "      <td>1147878820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>899</td>\n",
       "      <td>1</td>\n",
       "      <td>1147868510</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating   timestamp\n",
       "0       1      296       1  1147880044\n",
       "1       1      306       1  1147868817\n",
       "2       1      307       1  1147868828\n",
       "3       1      665       1  1147878820\n",
       "4       1      899       1  1147868510"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# For this project i have opted to change the rating values from a 1-5 rating to a 0/1 rating.\n",
    "# Making classification easier with fewer responses\n",
    "# Using lambda i am able to automate this process using one line. the lambda function returns a boolean value\n",
    "# I used the int function to change its type\n",
    "\n",
    "dataset[\"rating\"] = dataset[\"rating\"].apply(lambda x: int(x > 3))\n",
    "dataset.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "amended-electricity",
   "metadata": {},
   "source": [
    "## Get Training and testing data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "aad9a034",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: scikit-learn in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (0.24.2)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from scikit-learn) (2.1.0)\n",
      "Requirement already satisfied: joblib>=0.11 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from scikit-learn) (1.0.1)\n",
      "Requirement already satisfied: numpy>=1.13.3 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from scikit-learn) (1.19.2)\n",
      "Requirement already satisfied: scipy>=0.19.1 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from scikit-learn) (1.5.2)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install -U scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "lined-invasion",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using sklears train test split to split the data into random subsets \n",
    "# I specified the test size to be 0.35 (35% of the original data) \n",
    "# And a random state of 28 to make sure i get the same split each time \n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "train, test = train_test_split(dataset, test_size = 0.35, random_state = 28)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "objective-outside",
   "metadata": {},
   "source": [
    "### Explore train and test data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "great-columbia",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15558276</th>\n",
       "      <td>100737</td>\n",
       "      <td>30793</td>\n",
       "      <td>1</td>\n",
       "      <td>1246805953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7435598</th>\n",
       "      <td>48267</td>\n",
       "      <td>7143</td>\n",
       "      <td>1</td>\n",
       "      <td>1305372348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15963881</th>\n",
       "      <td>103498</td>\n",
       "      <td>48043</td>\n",
       "      <td>1</td>\n",
       "      <td>1294403268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17990831</th>\n",
       "      <td>116544</td>\n",
       "      <td>4262</td>\n",
       "      <td>1</td>\n",
       "      <td>1177293460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15962036</th>\n",
       "      <td>103487</td>\n",
       "      <td>2615</td>\n",
       "      <td>1</td>\n",
       "      <td>959990693</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          userId  movieId  rating   timestamp\n",
       "15558276  100737    30793       1  1246805953\n",
       "7435598    48267     7143       1  1305372348\n",
       "15963881  103498    48043       1  1294403268\n",
       "17990831  116544     4262       1  1177293460\n",
       "15962036  103487     2615       1   959990693"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Explore training data\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "impressive-jaguar",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10660080</th>\n",
       "      <td>69224</td>\n",
       "      <td>181545</td>\n",
       "      <td>1</td>\n",
       "      <td>1525157229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8479980</th>\n",
       "      <td>55195</td>\n",
       "      <td>132480</td>\n",
       "      <td>1</td>\n",
       "      <td>1446660403</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21694756</th>\n",
       "      <td>141028</td>\n",
       "      <td>3977</td>\n",
       "      <td>0</td>\n",
       "      <td>1061333069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13359258</th>\n",
       "      <td>86443</td>\n",
       "      <td>27846</td>\n",
       "      <td>1</td>\n",
       "      <td>1173055252</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20095339</th>\n",
       "      <td>130627</td>\n",
       "      <td>1722</td>\n",
       "      <td>1</td>\n",
       "      <td>1218986817</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          userId  movieId  rating   timestamp\n",
       "10660080   69224   181545       1  1525157229\n",
       "8479980    55195   132480       1  1446660403\n",
       "21694756  141028     3977       0  1061333069\n",
       "13359258   86443    27846       1  1173055252\n",
       "20095339  130627     1722       1  1218986817"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Explore testing data\n",
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "breeding-contribution",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(8750034, 4) (16250061, 4)\n"
     ]
    }
   ],
   "source": [
    "# Explore the shape of both traing and test data\n",
    "print(test.shape, train.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "honest-triumph",
   "metadata": {},
   "source": [
    "# Model Creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "bizarre-breeding",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define input layers\n",
    "# Both inputs for the movie and the user will expect a one dimensional array with one element as input\n",
    "movie_input = Input(shape=[1], name = 'Movies')\n",
    "user_input = Input(shape=[1], name = 'Users')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "special-tractor",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define embedding layers\n",
    "# Where MF stands for Matrix Factorization\n",
    "# Define the input dimensions as the number of movies and users respectively and their output dimension as 4\n",
    "\n",
    "\n",
    "movies_mf_embedding = Embedding(209171 + 1, 4, name= 'Embedded_Movies_MF', input_length=1)\n",
    "\n",
    "users_mf_embedding = Embedding(users_len + 1, 4, name = 'Embedded_Users_MF', input_length=1,)\n",
    "\n",
    "movies_mlp_embedding = Embedding(209171 + 1, 4, name = 'Embeded_Movies_MLP',input_length=1,)\n",
    "\n",
    "users_mlp_embedding = Embedding(users_len + 1, 4 , name = 'Embeded_Users_MLP', input_length=1,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "crazy-bloom",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Matrix factorization function\n",
    "# The purpose of this function is to hold the data for the matrix factorization part of my code\n",
    "# I have used the flatten fuction to flatten the data and the dot product to calcuate the movie rating\n",
    "def MatrixFactorizationNN(movies_embedding_MF, users_embeding_MF):\n",
    "    movies_mf_flatten = Flatten(name = 'Flatten_Movies_MF')(movies_mf_embedding(movie_input))\n",
    "    users_mf_flatten = Flatten(name = 'Flatten_Users_MF')(users_mf_embedding(user_input))\n",
    "    matrix_vect = Dot(name = 'Dot_Product_MF', axes = 1)([movies_mf_flatten, users_mf_flatten])\n",
    "    return matrix_vect"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "stone-certificate",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "Movies (InputLayer)             (None, 1)            0                                            \n",
      "__________________________________________________________________________________________________\n",
      "Users (InputLayer)              (None, 1)            0                                            \n",
      "__________________________________________________________________________________________________\n",
      "Embedded_Movies_MF (Embedding)  (None, 1, 4)         836688      Movies[0][0]                     \n",
      "__________________________________________________________________________________________________\n",
      "Embedded_Users_MF (Embedding)   (None, 1, 4)         650168      Users[0][0]                      \n",
      "__________________________________________________________________________________________________\n",
      "Flatten_Movies_MF (Flatten)     (None, 4)            0           Embedded_Movies_MF[0][0]         \n",
      "__________________________________________________________________________________________________\n",
      "Flatten_Users_MF (Flatten)      (None, 4)            0           Embedded_Users_MF[0][0]          \n",
      "__________________________________________________________________________________________________\n",
      "Dot_Product_MF (Dot)            (None, 1)            0           Flatten_Movies_MF[0][0]          \n",
      "                                                                 Flatten_Users_MF[0][0]           \n",
      "==================================================================================================\n",
      "Total params: 1,486,856\n",
      "Trainable params: 1,486,856\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "# Create model using the output of the matrix factorization\n",
    "model = Model([movie_input, user_input], MatrixFactorizationNN(movies_mf_embedding, users_mf_embedding))\n",
    "model.compile(optimizer = 'adam', loss = 'mean_squared_error')\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "9ba2f50e",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: GraphViz in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (0.16)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install GraphViz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "ae6714cc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<svg height=\"304pt\" viewBox=\"0.00 0.00 745.50 304.00\" width=\"746pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g class=\"graph\" id=\"graph0\" transform=\"scale(1 1) rotate(0) translate(4 300)\">\n",
       "<title>G</title>\n",
       "<polygon fill=\"white\" points=\"-4,4 -4,-300 741.5,-300 741.5,4 -4,4\" stroke=\"transparent\"/>\n",
       "<!-- 140679661934688 -->\n",
       "<g class=\"node\" id=\"node1\">\n",
       "<title>140679661934688</title>\n",
       "<polygon fill=\"none\" points=\"55.5,-249.5 55.5,-295.5 309.5,-295.5 309.5,-249.5 55.5,-249.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"119.5\" y=\"-268.8\">Movies: InputLayer</text>\n",
       "<polyline fill=\"none\" points=\"183.5,-249.5 183.5,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"211.5\" y=\"-280.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"183.5,-272.5 239.5,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"211.5\" y=\"-257.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"239.5,-249.5 239.5,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"274.5\" y=\"-280.3\">(None, 1)</text>\n",
       "<polyline fill=\"none\" points=\"239.5,-272.5 309.5,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"274.5\" y=\"-257.3\">(None, 1)</text>\n",
       "</g>\n",
       "<!-- 140679662064920 -->\n",
       "<g class=\"node\" id=\"node3\">\n",
       "<title>140679662064920</title>\n",
       "<polygon fill=\"none\" points=\"0,-166.5 0,-212.5 365,-212.5 365,-166.5 0,-166.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"112.5\" y=\"-185.8\">Embedded_Movies_MF: Embedding</text>\n",
       "<polyline fill=\"none\" points=\"225,-166.5 225,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"253\" y=\"-197.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"225,-189.5 281,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"253\" y=\"-174.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"281,-166.5 281,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"323\" y=\"-197.3\">(None, 1)</text>\n",
       "<polyline fill=\"none\" points=\"281,-189.5 365,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"323\" y=\"-174.3\">(None, 1, 4)</text>\n",
       "</g>\n",
       "<!-- 140679661934688&#45;&gt;140679662064920 -->\n",
       "<g class=\"edge\" id=\"edge1\">\n",
       "<title>140679661934688-&gt;140679662064920</title>\n",
       "<path d=\"M182.5,-249.37C182.5,-241.15 182.5,-231.66 182.5,-222.73\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"186,-222.61 182.5,-212.61 179,-222.61 186,-222.61\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 140679661933176 -->\n",
       "<g class=\"node\" id=\"node2\">\n",
       "<title>140679661933176</title>\n",
       "<polygon fill=\"none\" points=\"438.5,-249.5 438.5,-295.5 682.5,-295.5 682.5,-249.5 438.5,-249.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"497.5\" y=\"-268.8\">Users: InputLayer</text>\n",
       "<polyline fill=\"none\" points=\"556.5,-249.5 556.5,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"584.5\" y=\"-280.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"556.5,-272.5 612.5,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"584.5\" y=\"-257.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"612.5,-249.5 612.5,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"647.5\" y=\"-280.3\">(None, 1)</text>\n",
       "<polyline fill=\"none\" points=\"612.5,-272.5 682.5,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"647.5\" y=\"-257.3\">(None, 1)</text>\n",
       "</g>\n",
       "<!-- 140679662064864 -->\n",
       "<g class=\"node\" id=\"node4\">\n",
       "<title>140679662064864</title>\n",
       "<polygon fill=\"none\" points=\"383.5,-166.5 383.5,-212.5 737.5,-212.5 737.5,-166.5 383.5,-166.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"490.5\" y=\"-185.8\">Embedded_Users_MF: Embedding</text>\n",
       "<polyline fill=\"none\" points=\"597.5,-166.5 597.5,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"625.5\" y=\"-197.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"597.5,-189.5 653.5,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"625.5\" y=\"-174.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"653.5,-166.5 653.5,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"695.5\" y=\"-197.3\">(None, 1)</text>\n",
       "<polyline fill=\"none\" points=\"653.5,-189.5 737.5,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"695.5\" y=\"-174.3\">(None, 1, 4)</text>\n",
       "</g>\n",
       "<!-- 140679661933176&#45;&gt;140679662064864 -->\n",
       "<g class=\"edge\" id=\"edge2\">\n",
       "<title>140679661933176-&gt;140679662064864</title>\n",
       "<path d=\"M560.5,-249.37C560.5,-241.15 560.5,-231.66 560.5,-222.73\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"564,-222.61 560.5,-212.61 557,-222.61 564,-222.61\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 140679661934240 -->\n",
       "<g class=\"node\" id=\"node5\">\n",
       "<title>140679661934240</title>\n",
       "<polygon fill=\"none\" points=\"46.5,-83.5 46.5,-129.5 364.5,-129.5 364.5,-83.5 46.5,-83.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"135.5\" y=\"-102.8\">Flatten_Movies_MF: Flatten</text>\n",
       "<polyline fill=\"none\" points=\"224.5,-83.5 224.5,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"252.5\" y=\"-114.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"224.5,-106.5 280.5,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"252.5\" y=\"-91.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"280.5,-83.5 280.5,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"322.5\" y=\"-114.3\">(None, 1, 4)</text>\n",
       "<polyline fill=\"none\" points=\"280.5,-106.5 364.5,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"322.5\" y=\"-91.3\">(None, 4)</text>\n",
       "</g>\n",
       "<!-- 140679662064920&#45;&gt;140679661934240 -->\n",
       "<g class=\"edge\" id=\"edge3\">\n",
       "<title>140679662064920-&gt;140679661934240</title>\n",
       "<path d=\"M188.78,-166.37C191.14,-158.06 193.87,-148.45 196.43,-139.43\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"199.86,-140.18 199.22,-129.61 193.12,-138.27 199.86,-140.18\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 140679661935528 -->\n",
       "<g class=\"node\" id=\"node6\">\n",
       "<title>140679661935528</title>\n",
       "<polygon fill=\"none\" points=\"394.5,-83.5 394.5,-129.5 702.5,-129.5 702.5,-83.5 394.5,-83.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"478.5\" y=\"-102.8\">Flatten_Users_MF: Flatten</text>\n",
       "<polyline fill=\"none\" points=\"562.5,-83.5 562.5,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"590.5\" y=\"-114.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"562.5,-106.5 618.5,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"590.5\" y=\"-91.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"618.5,-83.5 618.5,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"660.5\" y=\"-114.3\">(None, 1, 4)</text>\n",
       "<polyline fill=\"none\" points=\"618.5,-106.5 702.5,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"660.5\" y=\"-91.3\">(None, 4)</text>\n",
       "</g>\n",
       "<!-- 140679662064864&#45;&gt;140679661935528 -->\n",
       "<g class=\"edge\" id=\"edge4\">\n",
       "<title>140679662064864-&gt;140679661935528</title>\n",
       "<path d=\"M557.22,-166.37C556,-158.15 554.6,-148.66 553.27,-139.73\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"556.7,-138.99 551.78,-129.61 549.78,-140.01 556.7,-138.99\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 140679661935696 -->\n",
       "<g class=\"node\" id=\"node7\">\n",
       "<title>140679661935696</title>\n",
       "<polygon fill=\"none\" points=\"200.5,-0.5 200.5,-46.5 540.5,-46.5 540.5,-0.5 200.5,-0.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"272.5\" y=\"-19.8\">Dot_Product_MF: Dot</text>\n",
       "<polyline fill=\"none\" points=\"344.5,-0.5 344.5,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"372.5\" y=\"-31.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"344.5,-23.5 400.5,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"372.5\" y=\"-8.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"400.5,-0.5 400.5,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"470.5\" y=\"-31.3\">[(None, 4), (None, 4)]</text>\n",
       "<polyline fill=\"none\" points=\"400.5,-23.5 540.5,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"470.5\" y=\"-8.3\">(None, 1)</text>\n",
       "</g>\n",
       "<!-- 140679661934240&#45;&gt;140679661935696 -->\n",
       "<g class=\"edge\" id=\"edge5\">\n",
       "<title>140679661934240-&gt;140679661935696</title>\n",
       "<path d=\"M250.59,-83.37C270.87,-73.41 294.97,-61.58 316.24,-51.14\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"318.03,-54.16 325.47,-46.61 314.95,-47.87 318.03,-54.16\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 140679661935528&#45;&gt;140679661935696 -->\n",
       "<g class=\"edge\" id=\"edge6\">\n",
       "<title>140679661935528-&gt;140679661935696</title>\n",
       "<path d=\"M499.86,-83.37C477.79,-73.32 451.51,-61.36 428.42,-50.86\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"429.63,-47.56 419.08,-46.61 426.73,-53.93 429.63,-47.56\" stroke=\"black\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "import keras\n",
    "from IPython.display import SVG\n",
    "from keras.optimizers import Adam\n",
    "from keras.utils.vis_utils import model_to_dot\n",
    "SVG(model_to_dot(model,  show_shapes=True, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "legislative-vault",
   "metadata": {},
   "source": [
    "# Train the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "abandoned-myrtle",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Beacsue the raining process take a long time especially when dealing with a large dataset such as this one \n",
    "# I used the load model from keras to store the trained model as a .h5 file and load it when needed\n",
    "\n",
    "from keras.models import load_model\n",
    "\n",
    "if os.path.exists('recommender_model2.h5'):\n",
    "    model = load_model('recommender_model2.h5')\n",
    "else:\n",
    "    history = model.fit([train.movieId, train.userId], train.rating, epochs= 4, verbose=1)\n",
    "    model.save('recommender_model2.h5')\n",
    "    plt.plot(history.history['loss'])\n",
    "    plt.xlabel = ('Epochs')\n",
    "    plt.ylabel = (\"Training_Error\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "phantom-louisville",
   "metadata": {},
   "source": [
    "# Evaluate the Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "excess-quantity",
   "metadata": {},
   "outputs": [],
   "source": [
    "#loss, accuracy = model.evaluate([test.movieId, test.userId], test.rating)\n",
    "y_hat = np.round(model.predict([test.movieId, test.userId]),0)\n",
    "y_true = test.rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "2fa542d4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2689119836562921"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import mean_absolute_error\n",
    "mean_absolute_error(y_true, y_hat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "otherwise-creativity",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.7927092] 1\n",
      "[1.1115079] 1\n",
      "[0.0020586] 0\n",
      "[1.0242506] 1\n",
      "[0.22690105] 1\n",
      "[0.50635934] 1\n",
      "[0.65502155] 0\n",
      "[0.37845236] 0\n",
      "[0.7152109] 1\n",
      "[0.07030999] 0\n",
      "[0.6284604] 0\n",
      "[-0.17061862] 0\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[None, None, None, None, None, None, None, None, None, None, None, None]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions = model.predict([test.movieId.head(12), test.userId.head(12)])\n",
    "[print(predictions[i], test.rating.iloc[i]) for i in range (0,12)]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "average-estonia",
   "metadata": {},
   "source": [
    "### Mean Squared Error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "antique-complement",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_hat = np.round(model.predict([test.movieId, test.userId]),0)\n",
    "y_true = test.rating"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "perfect-tension",
   "metadata": {},
   "source": [
    "# Make recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "following-appendix",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([131072,      1,      2, ..., 131066, 131068, 131070])"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "moviedata = np.array(list(set(dataset.movieId)))\n",
    "moviedata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "economic-aruba",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([10, 10, 10, ..., 10, 10, 10])"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user = np.array([10 for i in range(len(moviedata))])\n",
    "user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "documentary-coating",
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions = model.predict([moviedata, user])\n",
    "predictions = np.array([a[0] for a in predictions])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "single-harbor",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([40511, 13673, 49893, 35302, 58824, 37541, 42699, 28908, 34748,\n",
       "       50747])"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommendedmovies = (predictions).argsort()[:10]\n",
    "recommendedmovies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "recovered-snapshot",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies = pd.read_csv(\"movies.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "italic-allowance",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [movieId, title, genres]\n",
       "Index: []"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies[movies['movieId'].isin (recommendedmovies)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "opposite-police",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[   10    11    17    32    34    36    39    47    50    95   110   111\n",
      "   141   145   150   153   161   165   185   215   253   260   277   288\n",
      "   292   296   300   318   339   344   345   349   356   357   364   372\n",
      "   377   380   412   434   454   457   474   480   500   527   539   541\n",
      "   586   587   588   589   590   592   593   595   597   608   647   648\n",
      "   733   736   750   780   788   838   858   899   909   912   924   926\n",
      "   932   940   965   969  1036  1078  1079  1089  1092  1097  1101  1172\n",
      "  1183  1193  1196  1198  1200  1203  1210  1213  1214  1220  1221  1225\n",
      "  1230  1240  1246  1258  1259  1265  1269  1270  1287  1291  1292  1307\n",
      "  1339  1377  1387  1391  1393  1453  1459  1479  1485  1517  1541  1573\n",
      "  1580  1608  1610  1617  1625  1641  1653  1693  1694  1704  1721  1784\n",
      "  1917  1923  1939  1953  1961  1968  2000  2002  2011  2012  2018  2028\n",
      "  2072  2081  2115  2133  2141  2174  2248  2262  2314  2321  2329  2353\n",
      "  2384  2396  2406  2427  2445  2502  2505  2520  2521  2522  2539  2571\n",
      "  2617  2628  2640  2683  2706  2707  2710  2712  2723  2731  2753  2762\n",
      "  2791  2797  2819  2858  2870  2871  2915  2918  2959  2967  2987  2997\n",
      "  3004  3081  3105  3107  3147  3175  3206  3251  3253  3255  3364  3370\n",
      "  3401  3408  3420  3471  3481  3525  3534  3578  3599  3623  3649  3736\n",
      "  3742  3751  3755  3793  3801  3824  3841  3897  3911  3948  3977  3994\n",
      "  3996  4022  4027  4031  4034  4080  4084  4085  4159  4167  4174  4191\n",
      "  4226  4238  4246  4305  4306  4370  4392  4393  4419  4462  4464  4465\n",
      "  4474  4564  4639  4844  4963  4973  4979  4995  5049  5096  5114  5151\n",
      "  5226  5299  5349  5377  5404  5418  5445  5459  5472  5508  5581  5610\n",
      "  5618  5669  5677  5680  5707  5902  5945  5954  5960  5989  6218  6254\n",
      "  6287  6333  6377  6378  6440  6493  6502  6539  6548  6573  6586  6620\n",
      "  6711  6750  6787  6874  6953  6954  6957  7121  7147  7173  7293  7315\n",
      "  7361  7438  7444  8094  8120  8360  8464  8638  8665  8695  8784  8838\n",
      "  8874  8943  8948  8949  8961  8968  8973 25868 26078 27317 27724 30812\n",
      " 32584 32587 33166 33794 33836 34048 34338 35836 42938 43936 44191 44195\n",
      " 44759 45106 46578 46723 47640 48304 48326 48394 48516 48774 48780 49007\n",
      " 49272 50514 50802 50923 51255 51471 51662 51834 52967 53953 54286 54503\n",
      " 54997 55052 55069 55363 55765 55805 55820 55830 56367 56949 57243 58295\n",
      " 58559 58803 59258 59315 59910 60069 60103 61986 63082 63853 64957 66665\n",
      " 69243 70932 71460]\n"
     ]
    }
   ],
   "source": [
    "relationship = dataset.loc[dataset['userId'] == 31]\n",
    "relationship = relationship['movieId'].to_numpy()\n",
    "print(relationship)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "perceived-devil",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>GoldenEye (1995)</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>American President, The (1995)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>Sense and Sensibility (1995)</td>\n",
       "      <td>Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>32</td>\n",
       "      <td>Twelve Monkeys (a.k.a. 12 Monkeys) (1995)</td>\n",
       "      <td>Mystery|Sci-Fi|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>34</td>\n",
       "      <td>Babe (1995)</td>\n",
       "      <td>Children|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12901</th>\n",
       "      <td>64957</td>\n",
       "      <td>Curious Case of Benjamin Button, The (2008)</td>\n",
       "      <td>Drama|Fantasy|Mystery|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13103</th>\n",
       "      <td>66665</td>\n",
       "      <td>Away We Go (2009)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13402</th>\n",
       "      <td>69243</td>\n",
       "      <td>Before the Rains (2007)</td>\n",
       "      <td>Drama|Romance|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13694</th>\n",
       "      <td>70932</td>\n",
       "      <td>My Life in Ruins (2009)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13804</th>\n",
       "      <td>71460</td>\n",
       "      <td>Wanted (2009)</td>\n",
       "      <td>Action|Romance</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>399 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       movieId                                        title  \\\n",
       "9           10                             GoldenEye (1995)   \n",
       "10          11               American President, The (1995)   \n",
       "16          17                 Sense and Sensibility (1995)   \n",
       "31          32    Twelve Monkeys (a.k.a. 12 Monkeys) (1995)   \n",
       "33          34                                  Babe (1995)   \n",
       "...        ...                                          ...   \n",
       "12901    64957  Curious Case of Benjamin Button, The (2008)   \n",
       "13103    66665                            Away We Go (2009)   \n",
       "13402    69243                      Before the Rains (2007)   \n",
       "13694    70932                      My Life in Ruins (2009)   \n",
       "13804    71460                                Wanted (2009)   \n",
       "\n",
       "                              genres  \n",
       "9          Action|Adventure|Thriller  \n",
       "10              Comedy|Drama|Romance  \n",
       "16                     Drama|Romance  \n",
       "31           Mystery|Sci-Fi|Thriller  \n",
       "33                    Children|Drama  \n",
       "...                              ...  \n",
       "12901  Drama|Fantasy|Mystery|Romance  \n",
       "13103           Comedy|Drama|Romance  \n",
       "13402         Drama|Romance|Thriller  \n",
       "13694                         Comedy  \n",
       "13804                 Action|Romance  \n",
       "\n",
       "[399 rows x 3 columns]"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies[movies['movieId'].isin (relationship)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "diagnostic-damage",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: coremltools in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (4.1)\n",
      "Requirement already satisfied: six>=1.10.0 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (1.15.0)\n",
      "Requirement already satisfied: scipy in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (1.5.2)\n",
      "Requirement already satisfied: numpy<1.20,>=1.14.5 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (1.19.2)\n",
      "Requirement already satisfied: attr in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (0.3.1)\n",
      "Requirement already satisfied: protobuf>=3.1.0 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (3.14.0)\n",
      "Requirement already satisfied: attrs in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (20.3.0)\n",
      "Requirement already satisfied: tqdm in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (4.59.0)\n",
      "Requirement already satisfied: sympy in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (1.8)\n",
      "Requirement already satisfied: packaging in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from coremltools) (20.9)\n",
      "Requirement already satisfied: pyparsing>=2.0.2 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from packaging->coremltools) (2.4.7)\n",
      "Requirement already satisfied: mpmath>=0.19 in /Users/premoboweimiriki/opt/anaconda3/envs/RecSys2/lib/python3.6/site-packages (from sympy->coremltools) (1.2.1)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install coremltools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "noted-stations",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:scikit-learn version 0.24.2 is not supported. Minimum required version: 0.17. Maximum required version: 0.19.2. Disabling scikit-learn conversion API.\n"
     ]
    }
   ],
   "source": [
    "from keras.models import load_model\n",
    "import coremltools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "planned-infrared",
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "EOL while scanning string literal (<ipython-input-42-d4cef054d021>, line 4)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-42-d4cef054d021>\"\u001b[0;36m, line \u001b[0;32m4\u001b[0m\n\u001b[0;31m    your_model.output_description['output'] = 'Prediction of Digit\u001b[0m\n\u001b[0m                                                                  ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m EOL while scanning string literal\n"
     ]
    }
   ],
   "source": [
    "your_model.author = 'Premo Miriki'\n",
    "your_model.short_description = 'Movie Recommendation with Movielens'\n",
    "your_model.input_description['image'] = ''\n",
    "your_model.output_description['output'] = 'Prediction of Digit"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94316417",
   "metadata": {},
   "source": [
    "### Convert model to CoreML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "32aaa29b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import coremltools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7f10cd7a",
   "metadata": {},
   "outputs": [],
   "source": [
    "coreml_model = coremltools.converters.keras.convert(model)\n",
    "coreml_model.save('recSys.mlmodel')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea5b60f4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}