{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "yWX4CHjTEb6a"
      },
      "outputs": [],
      "source": [
        "!pip install openai langchain tiktoken langchain-openai langchain-community -q"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "l2Nn2MklEgyC"
      },
      "source": [
        "## Если используете ключ от OpenAI, запустите эту ячейку 👇"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "eL3AQn88EgGa"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "from getpass import getpass\n",
        "from langchain_openai import ChatOpenAI\n",
        "\n",
        "\n",
        "# os.environ['OPENAI_API_KEY'] = \"Введите ваш OpenAI API ключ\"\n",
        "os.environ['OPENAI_API_KEY'] = getpass(prompt='Введите ваш OpenAI API ключ')\n",
        "\n",
        "# Инициализируем языковую модель\n",
        "llm = ChatOpenAI(temperature=0.0)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hUn1o9WjEkwQ"
      },
      "source": [
        "## Если используете ключ из курса, запустите эти ячейки 👇"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "XVLxyu1OEkAZ"
      },
      "outputs": [],
      "source": [
        "!wget https://raw.githubusercontent.com/a-milenkin/LLM_practical_course/main/notebooks/utils.py"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "h23GG1C8EtZs"
      },
      "outputs": [],
      "source": [
        "from utils import ChatOpenAI\n",
        "from getpass import getpass\n",
        "\n",
        "#course_api_key= \"Введите ваш API ключ с курса\"\n",
        "course_api_key = getpass(prompt='Введите API ключ')\n",
        "\n",
        "# Инициализируем языковую модель\n",
        "llm = ChatOpenAI(course_api_key=course_api_key)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9xXARAI2mUfp"
      },
      "source": [
        "# Задание 2.2.8: ⚔️ Spam 🆚 Crypto"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "MSmKJBbUEvEt"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "from langchain import PromptTemplate\n",
        "from tqdm import tqdm"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ctMVarXbFB8B"
      },
      "outputs": [],
      "source": [
        "df = pd.read_csv('https://stepik.org/media/attachments/lesson/1110806/100_crypto_messages.csv')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "2y8GmlLdJCrA"
      },
      "outputs": [],
      "source": [
        "# Напишите промпт, указав инструкцию, запрос и выходной индикатор\n",
        "template = \"\"\"...\n",
        "\n",
        "Context: {text_input}\n",
        "\n",
        "Question: ...\n",
        "\n",
        "Answer: ...\n",
        "\"\"\"\n",
        "\n",
        "prompt_template = PromptTemplate(\n",
        "    input_variables=[\"text_input\"],\n",
        "    template=template\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "r0uNwaE8hznq"
      },
      "outputs": [],
      "source": [
        "spam_labels = [] # Список, где будем хранить ответы модели\n",
        "\n",
        "for text_input in tqdm(df['text']):\n",
        "    prompt = prompt_template.format(text_input=text_input) # Добавляем сообщение в промпт\n",
        "    spam_label = llm.invoke(prompt).content # Сохраняем ответ модели\n",
        "    spam_labels.append(spam_label) # Добавляем ответ в список\n",
        "    break # Для отладки. Уберите, когда убедитесь, что на одном примере работает"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "6XoGU6Zkm1RE"
      },
      "outputs": [],
      "source": [
        "df['is_spam'] = spam_labels # Создаём новый столбец из ответов модели"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "gJLpYmf5njxp"
      },
      "outputs": [],
      "source": [
        "df.to_csv('2_2_7_solution.csv', index=False) # Сохраняем файл, отправляем на Stepik, получаем баллы :)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9N0jiam-oDNt"
      },
      "source": [
        "# Задание 2.2.9: 💼 ChatGPT - ваш ручной карьерный консультант"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "I2oOaHvDBOTG"
      },
      "source": [
        "В данной задаче очень удобно использовать `OutputParser` (подробнее о нем в [ноутбуке](https://github.com/a-milenkin/LLM_practical_course/blob/main/notebooks/M2.2_LangChain_Prompting.ipynb))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "yHOU7Fvm1KDs"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "from langchain.output_parsers import ResponseSchema\n",
        "from langchain.output_parsers import StructuredOutputParser\n",
        "from langchain.prompts import ChatPromptTemplate"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "jrfTlE2Sr_E7"
      },
      "outputs": [],
      "source": [
        "df = pd.read_csv('https://stepik.org/media/attachments/lesson/1110806/vacancies_messages_50.csv')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "u14oe5fGsJdM"
      },
      "outputs": [],
      "source": [
        "template = \"\"\"Из следующего текста извлеки информацию:\n",
        "\n",
        "job_title: ...\n",
        "\n",
        "company: ...\n",
        "\n",
        "salary: ...\n",
        "\n",
        "tg: ...\n",
        "\n",
        "grade: ...\n",
        "\n",
        "text: {text}\n",
        "\n",
        "{format_instructions}\n",
        "\"\"\"\n",
        "\n",
        "prompt = ChatPromptTemplate.from_template(template=template)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "N90KnUeFs-Kn"
      },
      "outputs": [],
      "source": [
        "job_title_schema = ResponseSchema(\n",
        "    name=\"job_title\",\n",
        "    description=\"...\"\n",
        ")\n",
        "\n",
        "\n",
        "company_schema = ResponseSchema(\n",
        "    name=\"company\",\n",
        "    description=\"...\"\n",
        ")\n",
        "\n",
        "\n",
        "salary_schema = ResponseSchema(\n",
        "    name=\"salary\",\n",
        "    description=\"...\"\n",
        ")\n",
        "\n",
        "\n",
        "tg_schema = ResponseSchema(\n",
        "    name=\"tg\",\n",
        "    description=\"...\"\n",
        ")\n",
        "\n",
        "\n",
        "grade_schema = ResponseSchema(\n",
        "    name=\"grade\",\n",
        "    description=\"...\"\n",
        ")\n",
        "\n",
        "response_schemas = [\n",
        "    job_title_schema,\n",
        "    company_schema,\n",
        "    salary_schema,\n",
        "    tg_schema,\n",
        "    grade_schema\n",
        "]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_rIWSsUruZM8"
      },
      "outputs": [],
      "source": [
        "output_parser = StructuredOutputParser.from_response_schemas(response_schemas) # Создаём парсер и подаём в него список со схемами\n",
        "format_instructions = output_parser.get_format_instructions() # Получаем инструкции по форматированию ответа"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "5BjhrkTdsBBE"
      },
      "outputs": [],
      "source": [
        "dict_list = [] # Список из словарей, где будем хранить полученную информацию\n",
        "for text_input in tqdm(df['text']):\n",
        "    messages = prompt.format_messages(\n",
        "        text=text_input,\n",
        "        format_instructions=format_instructions\n",
        "    )\n",
        "    response = llm.invoke(messages) # Ответ модели\n",
        "    dict_list.append(output_parser.parse(response.content)) # Переводим содержимое ответа модели в словарь и добавляем в список\n",
        "    break # Для отладки. Уберите, когда убедитесь, что на одном примере работает"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YdkJvAxN5ghP"
      },
      "outputs": [],
      "source": [
        "result_df = pd.DataFrame(dict_list) # Создаем датафрейм из полученной информации\n",
        "ans_df = pd.concat([df, result_df], axis=1) # Объединяем всё в один датафрейм\n",
        "ans_df.to_csv('2_2_8_solution.csv', index=False) # Сохраняем файл, отправляем на Stepik, получаем баллы :)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
