{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "class MF():\n",
    "\n",
    "    def __init__(self, R, K, alpha, beta, iterations):\n",
    "\n",
    "        self.R = R #user-item матрица\n",
    "        self.num_users, self.num_items = R.shape \n",
    "        self.K = K #латентные факторы\n",
    "        self.alpha = alpha #learning rate\n",
    "        self.beta = beta #регуляризация\n",
    "        self.iterations = iterations\n",
    "\n",
    "    def train(self):\n",
    "        #Инициализируем латентные параметры\n",
    "        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))\n",
    "        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))\n",
    "\n",
    "        # Инициализируем bias\n",
    "        self.b_u = np.zeros(self.num_users)\n",
    "        self.b_i = np.zeros(self.num_items)\n",
    "        self.b = np.mean(self.R[np.where(self.R != 0)])\n",
    "\n",
    "        #Создаем лист для тренировочных записей\n",
    "        self.samples = [\n",
    "            (i, j, self.R[i, j])\n",
    "            for i in range(self.num_users)\n",
    "            for j in range(self.num_items)\n",
    "            if self.R[i, j] > 0\n",
    "        ]\n",
    "\n",
    "        #Выполняем SGD для заданного количества итераций\n",
    "        training_process = []\n",
    "        for i in range(self.iterations):\n",
    "            np.random.shuffle(self.samples)\n",
    "            self.sgd()\n",
    "            mse = self.mse()\n",
    "            training_process.append((i, mse))\n",
    "            if (i+1) % 10 == 0:\n",
    "                print(\"Iteration: %d ; error = %.4f\" % (i+1, mse))\n",
    "\n",
    "        return training_process\n",
    "\n",
    "    def mse(self):\n",
    "        xs, ys = self.R.nonzero()\n",
    "        predicted = self.full_matrix()\n",
    "        error = 0\n",
    "        for x, y in zip(xs, ys):\n",
    "            error += pow(self.R[x, y] - predicted[x, y], 2)\n",
    "        return np.sqrt(error)\n",
    "\n",
    "    def sgd(self):\n",
    "        for i, j, r in self.samples:\n",
    "            #Prediction, error\n",
    "            prediction = self.get_rating(i, j)\n",
    "            e = (r - prediction)\n",
    "\n",
    "            #Bias\n",
    "            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])\n",
    "            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])\n",
    "\n",
    "            #Обновляем матрицы\n",
    "            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])\n",
    "            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])\n",
    "\n",
    "    def get_rating(self, i, j):\n",
    "        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)\n",
    "        return prediction\n",
    "\n",
    "    def full_matrix(self):\n",
    "        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 10 ; error = 0.1765\n",
      "Iteration: 20 ; error = 0.0441\n",
      "Iteration: 30 ; error = 0.0355\n"
     ]
    }
   ],
   "source": [
    "R = np.array([\n",
    "    [5, 3, 0, 1],\n",
    "    [4, 0, 0, 1],\n",
    "    [1, 1, 0, 5],\n",
    "    [1, 0, 0, 4],\n",
    "    [0, 1, 5, 4],\n",
    "])\n",
    "\n",
    "mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=30)\n",
    "training_process = mf.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 4.98516926,  3.00305744,  3.69268725,  1.00797439],\n",
       "       [ 3.99669096,  2.67552074,  2.76676618,  1.00941029],\n",
       "       [ 1.00653448,  1.00678123,  3.78098212,  4.98343844],\n",
       "       [ 1.0098207 ,  0.91971876,  3.10772854,  3.99301525],\n",
       "       [ 2.54073976,  1.01389619,  4.9878478 ,  3.99556136]])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mf.full_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
