{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4uF_KywH0yV9"
      },
      "source": [
        "#Ноутбук для решения задач урока 4.2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Yk-K_mHMs7ns",
        "outputId": "9aa76769-e3c3-4185-9c44-914db93c3648"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m867.6/867.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m314.1/314.1 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.9/302.9 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.6/120.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h"
          ]
        }
      ],
      "source": [
        "!pip install langchain langchain-community huggingface_hub langchain-openai openai faiss-gpu langchainhub -q"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CoofhZcDtp6R"
      },
      "source": [
        "## Если используете ключ от OpenAI, запустите эту ячейку 👇\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "k48wWXnPtKa2"
      },
      "outputs": [],
      "source": [
        "from langchain_openai import ChatOpenAI\n",
        "import os\n",
        "from getpass import getpass\n",
        "\n",
        "\n",
        "# os.environ['OPENAI_API_KEY'] = \"Введите ваш OpenAI API ключ\"\n",
        "os.environ['OPENAI_API_KEY'] = getpass(prompt='Введите ваш OpenAI API ключ')\n",
        "\n",
        "# Инициализируем языковую модель\n",
        "llm = ChatOpenAI(temperature=0.0)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DiQqMwQJt8rP"
      },
      "source": [
        "## Если используете ключ из курса, запустите эти ячейки 👇\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Keq0WAjLuPgJ",
        "outputId": "c1feee73-f78e-4705-ff2b-b30ef9b84233"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "--2024-05-06 19:22:38--  https://raw.githubusercontent.com/a-milenkin/LLM_practical_course/main/notebooks/utils.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 11184 (11K) [text/plain]\n",
            "Saving to: ‘utils.py’\n",
            "\n",
            "\rutils.py              0%[                    ]       0  --.-KB/s               \rutils.py            100%[===================>]  10.92K  --.-KB/s    in 0s      \n",
            "\n",
            "2024-05-06 19:22:39 (38.7 MB/s) - ‘utils.py’ saved [11184/11184]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "!wget https://raw.githubusercontent.com/a-milenkin/LLM_practical_course/main/notebooks/utils.py"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "PCS_WedHtPRg",
        "outputId": "09c06280-3dbb-491e-9dcb-251474b6a6f2"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Введите API ключ··········\n"
          ]
        }
      ],
      "source": [
        "from utils import ChatOpenAI\n",
        "from getpass import getpass\n",
        "\n",
        "#course_api_key= \"Введите ваш API ключ с курса\"\n",
        "course_api_key = getpass(prompt='Введите API ключ')\n",
        "\n",
        "# Инициализируем языковую модель\n",
        "llm = ChatOpenAI(temperature=0.0, course_api_key=course_api_key)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Z9uleJTbu6Ui"
      },
      "source": [
        "## Задание из стэпа 6. 🪛 Openai tools Agent + RAG 🦞"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6F85KuvzBbFr"
      },
      "source": [
        "Распарсив веб страницу, создадим базу знаний"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "92DVpeI4lCW3"
      },
      "outputs": [],
      "source": [
        "from langchain.document_loaders import WebBaseLoader"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Rm6355L_P3Wf"
      },
      "outputs": [],
      "source": [
        "# ссылка на страницу из Википедии про Ганнибала\n",
        "url = \"https://ru.wikipedia.org/wiki/%D0%93%D0%B0%D0%BD%D0%BD%D0%B8%D0%B1%D0%B0%D0%BB,_%D0%90%D0%B1%D1%80%D0%B0%D0%BC_%D0%9F%D0%B5%D1%82%D1%80%D0%BE%D0%B2%D0%B8%D1%87\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "C7gtNBhuS5iW"
      },
      "outputs": [],
      "source": [
        "# Определите лоадер\n",
        "loader = ###\n",
        "data = loader.load()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LZeBiibpBh0D"
      },
      "source": [
        "Засплитим данные, сформируем эмбединги и создадим ретривер\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jWVZRh84S5cr",
        "outputId": "8dd72345-6a9f-4395-8929-71d9428444ce"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1007, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 3163, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1083, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1103, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1712, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1594, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1005, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1143, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 1042, which is longer than the specified 1000\n",
            "WARNING:langchain_text_splitters.base:Created a chunk of size 2589, which is longer than the specified 1000\n"
          ]
        }
      ],
      "source": [
        "from langchain_community.vectorstores import FAISS\n",
        "from langchain_text_splitters import CharacterTextSplitter\n",
        "from utils import OpenAIEmbeddings\n",
        "\n",
        "text_splitter = ###\n",
        "\n",
        "embeddings = OpenAIEmbeddings(course_api_key=course_api_key)\n",
        "\n",
        "db_embed = FAISS.from_documents()\n",
        "\n",
        "retriever = ###"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bD7rA6g9BkgZ"
      },
      "source": [
        "Инструменты для агента"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KB2nJbhrS5aM"
      },
      "outputs": [],
      "source": [
        "from langchain.tools.retriever import create_retriever_tool\n",
        "\n",
        "tool = create_retriever_tool(\n",
        "    # Ваш код\n",
        ")\n",
        "tools = [tool]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "E3sQK4z_S5Xm",
        "outputId": "8082b959-ae10-49ef-e310-d85f94828af7"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-tools-agent', 'lc_hub_commit_hash': 'c18672812789a3b9697656dd539edf0120285dcae36396d0b548ae42a4ed66f5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(variable_name='agent_scratchpad')])"
            ]
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "from langchain import hub\n",
        "\n",
        "prompt = hub.pull(\"hwchase17/openai-tools-agent\")\n",
        "prompt"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XyKK6v0HBo2Q"
      },
      "source": [
        "Подключаем агента"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "nNVoBzwIS5VA"
      },
      "outputs": [],
      "source": [
        "from langchain.agents import create_openai_tools_agent\n",
        "from langchain.agents.agent import AgentExecutor\n",
        "\n",
        "agent = # Ваш код\n",
        "agent_executor = AgentExecutor(# Ваш код)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "wqKxDBhRdzr-"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "from tqdm import tqdm"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "a7VFhJoNepCy"
      },
      "outputs": [],
      "source": [
        "# Загружаем датасет с вопросами про Ганнибала\n",
        "df = pd.read_csv(\"https://stepik.org/media/attachments/lesson/1107866/gannibal.csv\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "z5b0Ii9kvBx2",
        "outputId": "7ed288a3-ca17-4198-dceb-2cb639942b86"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "  0%|          | 0/10 [00:03<?, ?it/s]\n"
          ]
        }
      ],
      "source": [
        "answers = [] # Список, где будем хранить ответы модели\n",
        "\n",
        "for text_input in tqdm(df['question']):\n",
        "    answer = ### Ваш код. Получим ответ от агента\n",
        "    answers.append(answer['output']) # Добавляем ответ в список\n",
        "    break # Для отладки. Уберите, когда убедитесь, что на одном примере работает"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ks63JYu1vBpt"
      },
      "outputs": [],
      "source": [
        "df['answer'] = answers # Создаём новый столбец из ответов модели"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "FvJ8XyUQwrUK"
      },
      "outputs": [],
      "source": [
        "df.to_csv('step6_solution.csv', index=False) # Сохраняем файл, отправляем на Stepik, получаем баллы :)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "okESYbnsXoJM"
      },
      "source": [
        "## Задание из стэпа 9 📊Задача на SQL-агента.🕵️‍♂️\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gtDOHnboXjQ1",
        "outputId": "2402cfee-610f-4978-9d69-ccdf9f28da18"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            " * Starting PostgreSQL 14 database server\n",
            "   ...done.\n",
            "ALTER ROLE\n"
          ]
        }
      ],
      "source": [
        "# Установим postgres сервер\n",
        "!sudo apt-get -y -qq update\n",
        "!sudo apt-get -y -qq install postgresql\n",
        "!sudo service postgresql start\n",
        "\n",
        "# установим пароль `postgres` для пользователя `postgres`\n",
        "!sudo -u postgres psql -U postgres -c \"ALTER USER postgres PASSWORD 'postgres';\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CMzQ-qUD75-m"
      },
      "source": [
        "Создаем базу, пользователя и предоставляем нужные права.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rYX41roSXjN-",
        "outputId": "bf56e532-cc1b-4240-cb86-6634238c900c"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "CREATE DATABASE\n",
            "CREATE ROLE\n",
            "GRANT ROLE\n",
            "GRANT\n"
          ]
        }
      ],
      "source": [
        "!sudo -u postgres psql -U postgres -c \"CREATE DATABASE dvdrental;\" # CREATE DATABASE\n",
        "!sudo -u postgres psql -U postgres -c \"create user root with encrypted password 'mypass';\" # CREATE ROLE\n",
        "!sudo -u postgres psql -U postgres -c \"GRANT postgres TO root;\" # GRANT ROLE\n",
        "!sudo -u postgres psql -U postgres -c \"grant all privileges on database dvdrental to root;\" # GRANT"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BQoWz9ZJXjLZ",
        "outputId": "0af5d4a3-11d1-4484-89a7-0d56ce25b069"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
            "\r  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0\r  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0\r100  537k  100  537k    0     0  2936k      0 --:--:-- --:--:-- --:--:-- 2923k\n",
            "Archive:  /content/dvdrental.zip\n",
            "  inflating: dvdrental.tar           \n"
          ]
        }
      ],
      "source": [
        "# создаем базу из резервной копии\n",
        "!wget https://stepik.org/media/attachments/lesson/1107866/dvdrental.tar\n",
        "!sudo pg_restore -U root -d dvdrental /content/dvdrental.tar"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "eZdef16iXjIh",
        "outputId": "fe5d8a28-4a68-4588-ff20-aab2b197b163"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "postgresql\n",
            "['actor', 'address', 'category', 'city', 'country', 'customer', 'film', 'film_actor', 'film_category', 'inventory', 'language', 'payment', 'rental', 'staff', 'store']\n"
          ]
        }
      ],
      "source": [
        "from langchain_community.utilities import SQLDatabase\n",
        "\n",
        "# подключимся к базе\n",
        "db = SQLDatabase.from_uri(\"postgresql+psycopg2://root:mypass@localhost:5432/dvdrental\")\n",
        "print(db.dialect)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6hbw9CbFCVCZ"
      },
      "source": [
        "Создаем агента и вручаем ему инструменты"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1LBYMk_hakoE"
      },
      "outputs": [],
      "source": [
        "from langchain_community.agent_toolkits import create_sql_agent\n",
        "from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n",
        "\n",
        "toolkit = # Ваш код\n",
        "\n",
        "agent_executor = # Ваш код"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "weiMzxz5aklR"
      },
      "outputs": [],
      "source": [
        "# Загружаем датасет с вопросами к базе\n",
        "df = pd.read_csv(\"https://stepik.org/media/attachments/lesson/1107866/rental_dvd.csv\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uWDnC-Uiakez",
        "outputId": "6a7be5a5-58c4-41e6-9bf3-207f46492ea6"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "  0%|          | 0/5 [00:07<?, ?it/s]\n"
          ]
        }
      ],
      "source": [
        "answers = [] # Список, где будем хранить ответы модели\n",
        "\n",
        "for text_input in tqdm(df['question']):\n",
        "    answer = # Ваш код. Получим ответ от агента\n",
        "    answers.append(answer['output']) # Добавляем ответ в список\n",
        "    break # Для отладки. Уберите, когда убедитесь, что на одном примере работает"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "xHDAt0_3wrRi"
      },
      "outputs": [],
      "source": [
        "df['answer'] = answers # Создаём новый столбец из ответов модели"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "xWZe_a59dG3K"
      },
      "outputs": [],
      "source": [
        "df.to_csv('step_9_solution.csv', index=False) # Сохраняем файл, отправляем на Stepik, получаем баллы :)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "collapsed_sections": [
        "CoofhZcDtp6R"
      ],
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
