{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Факторизация матриц с SVD\n",
    "\n",
    "$$\\begin{equation}\n",
    "R = U\\Sigma V^{T}\n",
    "\\end{equation}$$\n",
    "\n",
    "где R это матрица рейтингов юзеров, $U$ это матрица фичей юзеров, $\\Sigma$ диагональная матрица сингулярных значений (весов), и $V^{T}$ матрица фичей фильмов. $U$ и $V^{T}$ ортогональны. $U$ показывает скольким юзерам нравится фича и $V^{T}$ показывает как фича относится(релевантна) фильму.\n",
    "\n",
    "Чтобы получить низкоранговую аппроксимацию, мы берем эти матрицы и оставляем только топ $k$ фичей, которые отражают скрытые предпочтения и вкусы. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "ratings_list = [i.strip().split(\"::\") for i in open('/users/muzalevskiy/Downloads/ml-1m/ratings.dat', 'r').readlines()]\n",
    "users_list = [i.strip().split(\"::\") for i in open('/users/muzalevskiy/Downloads/ml-1m/users.dat', 'r').readlines()]\n",
    "movies_list = [i.strip().split(\"::\") for i in open('/users/muzalevskiy/Downloads/ml-1m/movies.dat', 'r').readlines()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ratings = np.array(ratings_list)\n",
    "users = np.array(users_list)\n",
    "movies = np.array(movies_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ratings_df = pd.DataFrame(ratings_list, columns = ['UserID', 'MovieID', 'Rating', 'Timestamp'], dtype = int)\n",
    "movies_df = pd.DataFrame(movies_list, columns = ['MovieID', 'Title', 'Genres'])\n",
    "movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Title</th>\n",
       "      <th>Genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Animation|Children's|Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children's|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   MovieID                               Title                        Genres\n",
       "0        1                    Toy Story (1995)   Animation|Children's|Comedy\n",
       "1        2                      Jumanji (1995)  Adventure|Children's|Fantasy\n",
       "2        3             Grumpier Old Men (1995)                Comedy|Romance\n",
       "3        4            Waiting to Exhale (1995)                  Comedy|Drama\n",
       "4        5  Father of the Bride Part II (1995)                        Comedy"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1193</td>\n",
       "      <td>5</td>\n",
       "      <td>978300760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>661</td>\n",
       "      <td>3</td>\n",
       "      <td>978302109</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>914</td>\n",
       "      <td>3</td>\n",
       "      <td>978301968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3408</td>\n",
       "      <td>4</td>\n",
       "      <td>978300275</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>2355</td>\n",
       "      <td>5</td>\n",
       "      <td>978824291</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   UserID  MovieID  Rating  Timestamp\n",
       "0       1     1193       5  978300760\n",
       "1       1      661       3  978302109\n",
       "2       1      914       3  978301968\n",
       "3       1     3408       4  978300275\n",
       "4       1     2355       5  978824291"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratings_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>MovieID</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>3943</th>\n",
       "      <th>3944</th>\n",
       "      <th>3945</th>\n",
       "      <th>3946</th>\n",
       "      <th>3947</th>\n",
       "      <th>3948</th>\n",
       "      <th>3949</th>\n",
       "      <th>3950</th>\n",
       "      <th>3951</th>\n",
       "      <th>3952</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>UserID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 3706 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "MovieID  1     2     3     4     5     6     7     8     9     10    ...   \\\n",
       "UserID                                                               ...    \n",
       "1         5.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    \n",
       "2         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    \n",
       "3         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    \n",
       "4         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    \n",
       "5         0.0   0.0   0.0   0.0   0.0   2.0   0.0   0.0   0.0   0.0  ...    \n",
       "\n",
       "MovieID  3943  3944  3945  3946  3947  3948  3949  3950  3951  3952  \n",
       "UserID                                                               \n",
       "1         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "2         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "3         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "4         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "5         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "\n",
       "[5 rows x 3706 columns]"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "R_df = ratings_df.pivot(index = 'UserID', columns ='MovieID', values = 'Rating').fillna(0)\n",
    "R_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "R = R_df.as_matrix()\n",
    "user_ratings_mean = np.mean(R, axis = 1)\n",
    "R_demeaned = R - user_ratings_mean.reshape(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.sparse.linalg import svds\n",
    "U, sigma, Vt = svds(R_demeaned, k = 50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sigma = np.diag(sigma)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>MovieID</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>3943</th>\n",
       "      <th>3944</th>\n",
       "      <th>3945</th>\n",
       "      <th>3946</th>\n",
       "      <th>3947</th>\n",
       "      <th>3948</th>\n",
       "      <th>3949</th>\n",
       "      <th>3950</th>\n",
       "      <th>3951</th>\n",
       "      <th>3952</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.288861</td>\n",
       "      <td>0.143055</td>\n",
       "      <td>-0.195080</td>\n",
       "      <td>-0.018843</td>\n",
       "      <td>0.012232</td>\n",
       "      <td>-0.176604</td>\n",
       "      <td>-0.074120</td>\n",
       "      <td>0.141358</td>\n",
       "      <td>-0.059553</td>\n",
       "      <td>-0.195950</td>\n",
       "      <td>...</td>\n",
       "      <td>0.027807</td>\n",
       "      <td>0.001640</td>\n",
       "      <td>0.026395</td>\n",
       "      <td>-0.022024</td>\n",
       "      <td>-0.085415</td>\n",
       "      <td>0.403529</td>\n",
       "      <td>0.105579</td>\n",
       "      <td>0.031912</td>\n",
       "      <td>0.050450</td>\n",
       "      <td>0.088910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.744716</td>\n",
       "      <td>0.169659</td>\n",
       "      <td>0.335418</td>\n",
       "      <td>0.000758</td>\n",
       "      <td>0.022475</td>\n",
       "      <td>1.353050</td>\n",
       "      <td>0.051426</td>\n",
       "      <td>0.071258</td>\n",
       "      <td>0.161601</td>\n",
       "      <td>1.567246</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.056502</td>\n",
       "      <td>-0.013733</td>\n",
       "      <td>-0.010580</td>\n",
       "      <td>0.062576</td>\n",
       "      <td>-0.016248</td>\n",
       "      <td>0.155790</td>\n",
       "      <td>-0.418737</td>\n",
       "      <td>-0.101102</td>\n",
       "      <td>-0.054098</td>\n",
       "      <td>-0.140188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.818824</td>\n",
       "      <td>0.456136</td>\n",
       "      <td>0.090978</td>\n",
       "      <td>-0.043037</td>\n",
       "      <td>-0.025694</td>\n",
       "      <td>-0.158617</td>\n",
       "      <td>-0.131778</td>\n",
       "      <td>0.098977</td>\n",
       "      <td>0.030551</td>\n",
       "      <td>0.735470</td>\n",
       "      <td>...</td>\n",
       "      <td>0.040481</td>\n",
       "      <td>-0.005301</td>\n",
       "      <td>0.012832</td>\n",
       "      <td>0.029349</td>\n",
       "      <td>0.020866</td>\n",
       "      <td>0.121532</td>\n",
       "      <td>0.076205</td>\n",
       "      <td>0.012345</td>\n",
       "      <td>0.015148</td>\n",
       "      <td>-0.109956</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.408057</td>\n",
       "      <td>-0.072960</td>\n",
       "      <td>0.039642</td>\n",
       "      <td>0.089363</td>\n",
       "      <td>0.041950</td>\n",
       "      <td>0.237753</td>\n",
       "      <td>-0.049426</td>\n",
       "      <td>0.009467</td>\n",
       "      <td>0.045469</td>\n",
       "      <td>-0.111370</td>\n",
       "      <td>...</td>\n",
       "      <td>0.008571</td>\n",
       "      <td>-0.005425</td>\n",
       "      <td>-0.008500</td>\n",
       "      <td>-0.003417</td>\n",
       "      <td>-0.083982</td>\n",
       "      <td>0.094512</td>\n",
       "      <td>0.057557</td>\n",
       "      <td>-0.026050</td>\n",
       "      <td>0.014841</td>\n",
       "      <td>-0.034224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.574272</td>\n",
       "      <td>0.021239</td>\n",
       "      <td>-0.051300</td>\n",
       "      <td>0.246884</td>\n",
       "      <td>-0.032406</td>\n",
       "      <td>1.552281</td>\n",
       "      <td>-0.199630</td>\n",
       "      <td>-0.014920</td>\n",
       "      <td>-0.060498</td>\n",
       "      <td>0.450512</td>\n",
       "      <td>...</td>\n",
       "      <td>0.110151</td>\n",
       "      <td>0.046010</td>\n",
       "      <td>0.006934</td>\n",
       "      <td>-0.015940</td>\n",
       "      <td>-0.050080</td>\n",
       "      <td>-0.052539</td>\n",
       "      <td>0.507189</td>\n",
       "      <td>0.033830</td>\n",
       "      <td>0.125706</td>\n",
       "      <td>0.199244</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 3706 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "MovieID      1         2         3         4         5         6         7     \\\n",
       "0        4.288861  0.143055 -0.195080 -0.018843  0.012232 -0.176604 -0.074120   \n",
       "1        0.744716  0.169659  0.335418  0.000758  0.022475  1.353050  0.051426   \n",
       "2        1.818824  0.456136  0.090978 -0.043037 -0.025694 -0.158617 -0.131778   \n",
       "3        0.408057 -0.072960  0.039642  0.089363  0.041950  0.237753 -0.049426   \n",
       "4        1.574272  0.021239 -0.051300  0.246884 -0.032406  1.552281 -0.199630   \n",
       "\n",
       "MovieID      8         9         10      ...         3943      3944      3945  \\\n",
       "0        0.141358 -0.059553 -0.195950    ...     0.027807  0.001640  0.026395   \n",
       "1        0.071258  0.161601  1.567246    ...    -0.056502 -0.013733 -0.010580   \n",
       "2        0.098977  0.030551  0.735470    ...     0.040481 -0.005301  0.012832   \n",
       "3        0.009467  0.045469 -0.111370    ...     0.008571 -0.005425 -0.008500   \n",
       "4       -0.014920 -0.060498  0.450512    ...     0.110151  0.046010  0.006934   \n",
       "\n",
       "MovieID      3946      3947      3948      3949      3950      3951      3952  \n",
       "0       -0.022024 -0.085415  0.403529  0.105579  0.031912  0.050450  0.088910  \n",
       "1        0.062576 -0.016248  0.155790 -0.418737 -0.101102 -0.054098 -0.140188  \n",
       "2        0.029349  0.020866  0.121532  0.076205  0.012345  0.015148 -0.109956  \n",
       "3       -0.003417 -0.083982  0.094512  0.057557 -0.026050  0.014841 -0.034224  \n",
       "4       -0.015940 -0.050080 -0.052539  0.507189  0.033830  0.125706  0.199244  \n",
       "\n",
       "[5 rows x 3706 columns]"
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)\n",
    "preds_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 259,
   "metadata": {},
   "outputs": [],
   "source": [
    "def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5):\n",
    "    \n",
    "    user_row_number = userID - 1 \n",
    "    sorted_user_predictions = preds_df.iloc[user_row_number].sort_values(ascending=False) \n",
    "    \n",
    "    user_data = original_ratings_df[original_ratings_df.UserID == (userID)]\n",
    "    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'MovieID', right_on = 'MovieID').\n",
    "                     sort_values(['Rating'], ascending=False)\n",
    "                 )\n",
    "\n",
    "    print 'User {0} has already rated {1} movies.'.format(userID, user_full.shape[0])\n",
    "    print 'Recommending highest {0} predicted ratings movies not already rated.'.format(num_recommendations)\n",
    "    \n",
    "    recommendations = (movies_df[~movies_df['MovieID'].isin(user_full['MovieID'])].\n",
    "         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',\n",
    "               left_on = 'MovieID',\n",
    "               right_on = 'MovieID').\n",
    "         rename(columns = {user_row_number: 'Predictions'}).\n",
    "         sort_values('Predictions', ascending = False).\n",
    "                       iloc[:num_recommendations, :-1]\n",
    "                      )\n",
    "\n",
    "    return user_full, recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 256,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "User 837 has already rated 69 movies.\n",
      "Recommending highest 10 predicted ratings movies not already rated.\n"
     ]
    }
   ],
   "source": [
    "already_rated, predictions = recommend_movies(preds_df, 837, movies_df, ratings_df, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 257,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Title</th>\n",
       "      <th>Genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>837</td>\n",
       "      <td>858</td>\n",
       "      <td>5</td>\n",
       "      <td>975360036</td>\n",
       "      <td>Godfather, The (1972)</td>\n",
       "      <td>Action|Crime|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>837</td>\n",
       "      <td>1387</td>\n",
       "      <td>5</td>\n",
       "      <td>975360036</td>\n",
       "      <td>Jaws (1975)</td>\n",
       "      <td>Action|Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>837</td>\n",
       "      <td>2028</td>\n",
       "      <td>5</td>\n",
       "      <td>975360089</td>\n",
       "      <td>Saving Private Ryan (1998)</td>\n",
       "      <td>Action|Drama|War</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>837</td>\n",
       "      <td>1221</td>\n",
       "      <td>5</td>\n",
       "      <td>975360036</td>\n",
       "      <td>Godfather: Part II, The (1974)</td>\n",
       "      <td>Action|Crime|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>837</td>\n",
       "      <td>913</td>\n",
       "      <td>5</td>\n",
       "      <td>975359921</td>\n",
       "      <td>Maltese Falcon, The (1941)</td>\n",
       "      <td>Film-Noir|Mystery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>837</td>\n",
       "      <td>3417</td>\n",
       "      <td>5</td>\n",
       "      <td>975360893</td>\n",
       "      <td>Crimson Pirate, The (1952)</td>\n",
       "      <td>Adventure|Comedy|Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>837</td>\n",
       "      <td>2186</td>\n",
       "      <td>4</td>\n",
       "      <td>975359955</td>\n",
       "      <td>Strangers on a Train (1951)</td>\n",
       "      <td>Film-Noir|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>837</td>\n",
       "      <td>2791</td>\n",
       "      <td>4</td>\n",
       "      <td>975360893</td>\n",
       "      <td>Airplane! (1980)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>837</td>\n",
       "      <td>1188</td>\n",
       "      <td>4</td>\n",
       "      <td>975360920</td>\n",
       "      <td>Strictly Ballroom (1992)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>837</td>\n",
       "      <td>1304</td>\n",
       "      <td>4</td>\n",
       "      <td>975360058</td>\n",
       "      <td>Butch Cassidy and the Sundance Kid (1969)</td>\n",
       "      <td>Action|Comedy|Western</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    UserID  MovieID  Rating  Timestamp  \\\n",
       "36     837      858       5  975360036   \n",
       "35     837     1387       5  975360036   \n",
       "65     837     2028       5  975360089   \n",
       "63     837     1221       5  975360036   \n",
       "11     837      913       5  975359921   \n",
       "20     837     3417       5  975360893   \n",
       "34     837     2186       4  975359955   \n",
       "55     837     2791       4  975360893   \n",
       "31     837     1188       4  975360920   \n",
       "28     837     1304       4  975360058   \n",
       "\n",
       "                                        Title                   Genres  \n",
       "36                      Godfather, The (1972)       Action|Crime|Drama  \n",
       "35                                Jaws (1975)            Action|Horror  \n",
       "65                 Saving Private Ryan (1998)         Action|Drama|War  \n",
       "63             Godfather: Part II, The (1974)       Action|Crime|Drama  \n",
       "11                 Maltese Falcon, The (1941)        Film-Noir|Mystery  \n",
       "20                 Crimson Pirate, The (1952)  Adventure|Comedy|Sci-Fi  \n",
       "34                Strangers on a Train (1951)       Film-Noir|Thriller  \n",
       "55                           Airplane! (1980)                   Comedy  \n",
       "31                   Strictly Ballroom (1992)           Comedy|Romance  \n",
       "28  Butch Cassidy and the Sundance Kid (1969)    Action|Comedy|Western  "
      ]
     },
     "execution_count": 257,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "already_rated.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 258,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Title</th>\n",
       "      <th>Genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>516</th>\n",
       "      <td>527</td>\n",
       "      <td>Schindler's List (1993)</td>\n",
       "      <td>Drama|War</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1848</th>\n",
       "      <td>1953</td>\n",
       "      <td>French Connection, The (1971)</td>\n",
       "      <td>Action|Crime|Drama|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>608</td>\n",
       "      <td>Fargo (1996)</td>\n",
       "      <td>Crime|Drama|Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1235</th>\n",
       "      <td>1284</td>\n",
       "      <td>Big Sleep, The (1946)</td>\n",
       "      <td>Film-Noir|Mystery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2085</th>\n",
       "      <td>2194</td>\n",
       "      <td>Untouchables, The (1987)</td>\n",
       "      <td>Action|Crime|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1188</th>\n",
       "      <td>1230</td>\n",
       "      <td>Annie Hall (1977)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1198</th>\n",
       "      <td>1242</td>\n",
       "      <td>Glory (1989)</td>\n",
       "      <td>Action|Drama|War</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>897</th>\n",
       "      <td>922</td>\n",
       "      <td>Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)</td>\n",
       "      <td>Film-Noir</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1849</th>\n",
       "      <td>1954</td>\n",
       "      <td>Rocky (1976)</td>\n",
       "      <td>Action|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>581</th>\n",
       "      <td>593</td>\n",
       "      <td>Silence of the Lambs, The (1991)</td>\n",
       "      <td>Drama|Thriller</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      MovieID                                          Title  \\\n",
       "516       527                        Schindler's List (1993)   \n",
       "1848     1953                  French Connection, The (1971)   \n",
       "596       608                                   Fargo (1996)   \n",
       "1235     1284                          Big Sleep, The (1946)   \n",
       "2085     2194                       Untouchables, The (1987)   \n",
       "1188     1230                              Annie Hall (1977)   \n",
       "1198     1242                                   Glory (1989)   \n",
       "897       922  Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)   \n",
       "1849     1954                                   Rocky (1976)   \n",
       "581       593               Silence of the Lambs, The (1991)   \n",
       "\n",
       "                           Genres  \n",
       "516                     Drama|War  \n",
       "1848  Action|Crime|Drama|Thriller  \n",
       "596          Crime|Drama|Thriller  \n",
       "1235            Film-Noir|Mystery  \n",
       "2085           Action|Crime|Drama  \n",
       "1188               Comedy|Romance  \n",
       "1198             Action|Drama|War  \n",
       "897                     Film-Noir  \n",
       "1849                 Action|Drama  \n",
       "581                Drama|Thriller  "
      ]
     },
     "execution_count": 258,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
