{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gEG2cGnXfVBM",
        "outputId": "2116dedf-27c6-41c8-9452-52725e4d78c8"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m810.5/810.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m262.4/262.4 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m269.1/269.1 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.6/71.6 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.8/77.8 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.5/138.5 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h"
          ]
        }
      ],
      "source": [
        "!pip install langchain openai -q"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ix68Au3nfW94"
      },
      "outputs": [],
      "source": [
        "import re\n",
        "import pandas as pd\n",
        "from tqdm import tqdm\n",
        "from getpass import getpass\n",
        "\n",
        "from langchain.prompts import PromptTemplate\n",
        "from langchain.chains import LLMChain, TransformChain\n",
        "from langchain.output_parsers import ResponseSchema, StructuredOutputParser"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KfBKAJ0vfiCh"
      },
      "source": [
        "## Если используете ключ из курса, запустите эти ячейки 👇\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mOvzGTXyfgAw",
        "outputId": "9043ea14-a63e-438f-aa48-771349f79547"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "--2024-03-21 16:40:29--  https://raw.githubusercontent.com/a-milenkin/LLM_practical_course/main/notebooks/utils.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 10822 (11K) [text/plain]\n",
            "Saving to: ‘utils.py’\n",
            "\n",
            "utils.py            100%[===================>]  10.57K  --.-KB/s    in 0s      \n",
            "\n",
            "2024-03-21 16:40:30 (36.9 MB/s) - ‘utils.py’ saved [10822/10822]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "!wget https://raw.githubusercontent.com/a-milenkin/LLM_practical_course/main/notebooks/utils.py"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_baYTEnSfex7",
        "outputId": "d2039f91-608e-4c6a-e3c6-69491f56be81"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Введите ваш API ключ, полученный в боте курса··········\n"
          ]
        }
      ],
      "source": [
        "from utils import ChatOpenAI\n",
        "from getpass import getpass\n",
        "\n",
        "#course_api_key= \"Введите ваш API ключ, полученный в боте курса\"\n",
        "course_api_key = getpass(prompt='Введите ваш API ключ, полученный в боте курса')\n",
        "\n",
        "# инициализируем языковую модель\n",
        "llm = ChatOpenAI(temperature=0.0, course_api_key=course_api_key)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "z2JH453Ufm4L"
      },
      "source": [
        "## Задание 3.2.9 🤔 Кажется, это что-то на LLM-ском? 🧐"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "HExe1XOeflIq",
        "outputId": "6b24db73-092a-414e-a444-e78ded57dabd"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 13,\n  \"fields\": [\n    {\n      \"column\": \"raw_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 13,\n        \"samples\": [\n          \"\\u0412 \\u0433\\u043e\\u0440\\u043e\\u0434\\u0435 \\u0431\\u044b\\u043b\\u043e \\u043c\\u043d\\u043e\\u0436\\u0435\\u0441\\u0442\\u0432\\u043e \\u043b\\u044e\\u00a3\\u0434\\u0435\\u0439, \\u043a\\u0430\\u0436\\u0434\\u044b\\u0439 \\u0438\\u0437 \\u043a\\u043e\\u00bf\\u0442\\u043e\\u0440\\u044b\\u0445 \\u0431\\u044b\\u043b \\u0437\\u0430\\u043d\\u044f\\u0442 \\u0441\\u0432\\u043e\\u0438\\u043c\\u0438 \\u0434\\u0435\\u043b\\u0430\\u043c\\u0438. \\u0421\\u0440\\u00bf\\u0435\\u0434\\u0438 \\u044d\\u0442\\u043e\\u0439 \\u0441\\u0443\\u0435\\u0442\\u044b, \\u0418\\u0432\\u0430\\u00a3\\u043d, \\u043c\\u043e\\u043b\\u043e\\u0434\\u043e\\u0439 \\u0447\\u0435\\u043b\\u043e\\u0432\\u0435\\u043a \\u0441 \\u0431\\u043e\\u043b\\u044c\\u0448\\u0438\\u043c\\u0438 \\u043c\\u0435\\u0447\\u0442\\u0430\\u043c\\u0438, \\u043f\\u044b\\u0442\\u0430\\u043b\\u0441\\u044f \\u043d\\u0430\\u0439\\u0442\\u0438 \\u0441\\u0432\\u043e\\u0435 \\u043c\\u0435\\u0441\\u0442\\u043e. \\u0415\\u0433\\u043e \\u0434\\u0440\\u0443\\u0437\\u044c\\u044f, \\u0421\\u0435\\u0440\\u0433\\u0435\\u0439 \\u0438 \\u041c\\u0430\\u0440\\u00a1\\u0438\\u044f, \\u0432\\u0441\\u0435\\u0433\\u0434\\u0430 \\u0431\\u044b\\u043b\\u0438 \\u0440\\u044f\\u0434\\u043e\\u043c, \\u043f\\u043e\\u0434\\u0434\\u0435\\u0440\\u0436\\u0438\\u0432\\u0430\\u044f \\u0435\\u0433\\u043e \\u0432\\u043e \\u0432\\u0441\\u0435\\u0445 \\u043d\\u0430\\u0447\\u0438\\u043d\\u0430\\u043d\\u0438\\u044f\\u0445. \\u041e\\u043d\\u0438 \\u0432\\u0435\\u0440\\u0438\\u043b\\u0438 \\u0432 \\u043d\\u0435\\u0433\\u043e, \\u043a\\u043e\\u0433\\u0434\\u0430 \\u043d\\u0438\\u043a\\u0442\\u043e \\u0434\\u0440\\u0443\\u0433\\u043e\\u0439 \\u043d\\u0435 \\u0432\\u0435\\u0440\\u0438\\u043b. \\u041d\\u043e \\u0436\\u0438\\u0437\\u043d\\u044c \\u0432 \\u0433\\u043e\\u0440\\u043e\\u0434\\u0435 \\u0431\\u044b\\u043b\\u0430 \\u043d\\u0435\\u043f\\u0440\\u043e\\u0441\\u0442\\u043e\\u0439. \\u041a\\u043e\\u043d\\u043a\\u0443\\u0440\\u0435\\u043d\\u0446\\u0438\\u044f \\u0431\\u044b\\u043b\\u0430 \\u043e\\u0436\\u0435\\u0441\\u0442\\u043e\\u0447\\u0435\\u043d\\u043d\\u043e\\u0439, \\u0430 \\u0431\\u043e\\u0440\\u044c\\u0431\\u0430 - \\u0440\\u0435\\u0430\\u043b\\u044c\\u043d\\u043e\\u0439. \\u041d\\u0435\\u0441\\u043c\\u043e\\u0442\\u0440\\u044f \\u043d\\u0430 \\u0432\\u0441\\u0435 \\u0442\\u0440\\u0443\\u0434\\u043d\\u043e\\u0441\\u0442\\u0438, \\u0418\\u0432\\u0430\\u043d \\u0431\\u044b\\u043b \\u0440\\u0435\\u0448\\u0435\\u043d \\u0441\\u0434\\u0435\\u043b\\u0430\\u0442\\u044c \\u0432\\u0441\\u0435 \\u0432\\u043e\\u0437\\u043c\\u043e\\u0436\\u043d\\u043e\\u0435, \\u0447\\u0442\\u043e\\u0431\\u00a3\\u044b \\u0434\\u043e\\u0441\\u0442\\u0438\\u0447\\u044c \\u0441\\u0432\\u043e\\u0438\\u0445 \\u0446\\u0435\\u043b\\u0435\\u0439. \\u041e\\u043d \\u0437\\u043d\\u0430\\u043b, \\u0447\\u0442\\u043e \\u0435\\u043c\\u0443 \\u043f\\u0440\\u0438\\u0434\\u0435\\u0442\\u0441\\u044f \\u043c\\u043d\\u043e\\u0433\\u043e \\u0440\\u0430\\u0431\\u043e\\u0442\\u0430\\u0442\\u044c \\u0438 \\u043d\\u0438\\u043a\\u043e\\u0433\\u0434\\u0430 \\u043d\\u0435 \\u0441\\u0434\\u0430\\u0432\\u0430\\u0442\\u044c\\u0441\\u044f. \\u0415\\u0433\\u043e \\u043f\\u0443\\u0442\\u044c \\u0431\\u044b\\u043b \\u043d\\u0435\\u043f\\u0440\\u043e\\u0441\\u0442, \\u043d\\u043e \\u0431\\u043b\\u0430\\u0433\\u043e\\u0434\\u0430\\u0440\\u044f \\u043f\\u043e\\u0434\\u0434\\u0435\\u0440\\u0436\\u043a\\u0435 \\u0434\\u0440\\u0443\\u0437\\u0435\\u0439 \\u0438 \\u043d\\u0435\\u043f\\u043e\\u043a\\u043e\\u043b\\u0435\\u0431\\u0438\\u043c\\u043e\\u0439 \\u0440\\u0435\\u0448\\u0438\\u043c\\u043e\\u0441\\u0442\\u0438, \\u043e\\u043d \\u0441\\u043c\\u043e\\u0433 \\u043f\\u0440\\u0435\\u043e\\u0434\\u043e\\u043b\\u0435\\u0442\\u044c \\u0432\\u0441\\u0435 \\u043f\\u0440\\u0435\\u043f\\u044f\\u0442\\u0441\\u0442\\u0432\\u0438\\u044f \\u0438 \\u0434\\u00a1\\u043e\\u0441\\u0442\\u0438\\u0447\\u044c \\u0441\\u0432\\u043e\\u0438\\u0445 \\u043c\\u0435\\u0447\\u0442.\",\n          \"En un peque\\u00f1o pueblo situado entre la\\u00a3s m\\u00bfonta\\u00f1as y el mar, viv\\u00eda una chica llamada Maria. Era conocida por su bondad y su amor por la naturaleza. Su vida estaba llena de personajes interesantes: el viejo pescador Juan, que le contaba historias del mar, el joven Tom\\u00e1s, que se un\\u00eda a ella en sus aventuras, y su mejor amiga Emilia, que siempre estaba all\\u00ed\\u00a1 para ella. Juntos, vivieron muchas aventuras, y cada d\\u00eda era un nuevo descubrimiento para ellos\\u00a3.\",\n          \"The sun was setting, casting long shadows over the small town. Jo\\u00bfhn, a middle-aged man with a heart full of dreams, was sitting on the porch of his old house. His friends, Peter and Mary, were there with him, sharing stories of their youth. They were all from the same tow\\u00a1n, and their lives were intertwined in ways they could never have imagined. Peter, a burly man with a booming voice, was the town's blacksmith. He was a man of few words, but his actions spoke volumes. Mary, on the other hand, was the town's schoolteacher. She was a woman of great wisdom and kindness, always ready to lend a helping hand. As they s\\u00a1at there, reminiscing about the past, a sense of nostalgia washed over them. They remembered the days when they were young and full of hope, when the world seemed full of possibilities. But now, as they looked at the setting sun, they realized that those days were long gone. John, however, was not ready to give up. He still had dreams, dreams that he was determined to fulfill. He knew that it would not be easy, but he was ready to face whatever challenges came his way. With Peter and Mary by his side, he knew that he could achieve anything.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe",
              "variable_name": "df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-f0bd8586-1dc4-4568-b78d-1132ce646a2e\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>raw_text</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>The sun was setting, casting long shadows over...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Le soleil se couchait, jetant de longues ombre...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>El sol se estaba poniendo, proyectando largas ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>La ciudad estaba llena de vida, sus calles lle...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>La ville était pleine de vie, ses rues remplie...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f0bd8586-1dc4-4568-b78d-1132ce646a2e')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-f0bd8586-1dc4-4568-b78d-1132ce646a2e button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-f0bd8586-1dc4-4568-b78d-1132ce646a2e');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-a7aaac19-a1fe-44ad-b75e-e0b9621f95e4\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a7aaac19-a1fe-44ad-b75e-e0b9621f95e4')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-a7aaac19-a1fe-44ad-b75e-e0b9621f95e4 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "                                            raw_text\n",
              "0  The sun was setting, casting long shadows over...\n",
              "1  Le soleil se couchait, jetant de longues ombre...\n",
              "2  El sol se estaba poniendo, proyectando largas ...\n",
              "3  La ciudad estaba llena de vida, sus calles lle...\n",
              "4  La ville était pleine de vie, ses rues remplie..."
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df = pd.read_csv(\"https://stepik.org/media/attachments/lesson/1110883/raw_texts.csv\")\n",
        "df.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "N9a5LdDiJwa2"
      },
      "outputs": [],
      "source": [
        "#df.to_csv('df.csv', index=False)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lECMhfEigA2x"
      },
      "source": [
        "Напишем функцию, которая очистит текст от ненужных символов: `¿, ¡, £`"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HsW9_cn6gy7-"
      },
      "source": [
        "\n",
        "Сохраним всё в итоговый файл. Убедитесь, что на этом этапе у вас в столбцах\n",
        "\n",
        "- `text` - очищенный текст (без символов ¿, ¡, £)\n",
        "- `language` - язык, на котором написан текст (название языка указать на английском языке)\n",
        "- `main_character` - имя главного персонажа в тексте (указать на том языке, на котором и написан сам текст)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "AcS14XikN5wc"
      },
      "outputs": [],
      "source": [
        "# Определение функции очистки текста\n",
        "def clean_text(inputs: dict) -> dict:\n",
        "    text = inputs[\"text\"]\n",
        "    cleaned_text = re.sub(r'[¿¡£]', '', text)\n",
        "    return {\"text\": cleaned_text}\n",
        "\n",
        "# Предположим, df - это ваш DataFrame с исходными данными, и он уже загружен\n",
        "# Пример: df = pd.read_csv('path_to_your_file.csv')\n",
        "\n",
        "# Применение функции очистки к каждому элементу в столбце 'raw_text'\n",
        "# Мы используем lambda-функцию для передачи каждого значения столбца в 'clean_text' в ожидаемом формате\n",
        "df['cleaned_text'] = df['raw_text'].apply(lambda x: clean_text({\"text\": x})[\"text\"])\n",
        "\n",
        "# Теперь в df есть столбец 'cleaned_text' с очищенными текстами\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 457
        },
        "id": "0tweuv_yKAqV",
        "outputId": "73309dc0-98ed-4277-b1b7-604e245201a7"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 13,\n  \"fields\": [\n    {\n      \"column\": \"raw_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 13,\n        \"samples\": [\n          \"\\u0412 \\u0433\\u043e\\u0440\\u043e\\u0434\\u0435 \\u0431\\u044b\\u043b\\u043e \\u043c\\u043d\\u043e\\u0436\\u0435\\u0441\\u0442\\u0432\\u043e \\u043b\\u044e\\u00a3\\u0434\\u0435\\u0439, \\u043a\\u0430\\u0436\\u0434\\u044b\\u0439 \\u0438\\u0437 \\u043a\\u043e\\u00bf\\u0442\\u043e\\u0440\\u044b\\u0445 \\u0431\\u044b\\u043b \\u0437\\u0430\\u043d\\u044f\\u0442 \\u0441\\u0432\\u043e\\u0438\\u043c\\u0438 \\u0434\\u0435\\u043b\\u0430\\u043c\\u0438. \\u0421\\u0440\\u00bf\\u0435\\u0434\\u0438 \\u044d\\u0442\\u043e\\u0439 \\u0441\\u0443\\u0435\\u0442\\u044b, \\u0418\\u0432\\u0430\\u00a3\\u043d, \\u043c\\u043e\\u043b\\u043e\\u0434\\u043e\\u0439 \\u0447\\u0435\\u043b\\u043e\\u0432\\u0435\\u043a \\u0441 \\u0431\\u043e\\u043b\\u044c\\u0448\\u0438\\u043c\\u0438 \\u043c\\u0435\\u0447\\u0442\\u0430\\u043c\\u0438, \\u043f\\u044b\\u0442\\u0430\\u043b\\u0441\\u044f \\u043d\\u0430\\u0439\\u0442\\u0438 \\u0441\\u0432\\u043e\\u0435 \\u043c\\u0435\\u0441\\u0442\\u043e. \\u0415\\u0433\\u043e \\u0434\\u0440\\u0443\\u0437\\u044c\\u044f, \\u0421\\u0435\\u0440\\u0433\\u0435\\u0439 \\u0438 \\u041c\\u0430\\u0440\\u00a1\\u0438\\u044f, \\u0432\\u0441\\u0435\\u0433\\u0434\\u0430 \\u0431\\u044b\\u043b\\u0438 \\u0440\\u044f\\u0434\\u043e\\u043c, \\u043f\\u043e\\u0434\\u0434\\u0435\\u0440\\u0436\\u0438\\u0432\\u0430\\u044f \\u0435\\u0433\\u043e \\u0432\\u043e \\u0432\\u0441\\u0435\\u0445 \\u043d\\u0430\\u0447\\u0438\\u043d\\u0430\\u043d\\u0438\\u044f\\u0445. \\u041e\\u043d\\u0438 \\u0432\\u0435\\u0440\\u0438\\u043b\\u0438 \\u0432 \\u043d\\u0435\\u0433\\u043e, \\u043a\\u043e\\u0433\\u0434\\u0430 \\u043d\\u0438\\u043a\\u0442\\u043e \\u0434\\u0440\\u0443\\u0433\\u043e\\u0439 \\u043d\\u0435 \\u0432\\u0435\\u0440\\u0438\\u043b. \\u041d\\u043e \\u0436\\u0438\\u0437\\u043d\\u044c \\u0432 \\u0433\\u043e\\u0440\\u043e\\u0434\\u0435 \\u0431\\u044b\\u043b\\u0430 \\u043d\\u0435\\u043f\\u0440\\u043e\\u0441\\u0442\\u043e\\u0439. \\u041a\\u043e\\u043d\\u043a\\u0443\\u0440\\u0435\\u043d\\u0446\\u0438\\u044f \\u0431\\u044b\\u043b\\u0430 \\u043e\\u0436\\u0435\\u0441\\u0442\\u043e\\u0447\\u0435\\u043d\\u043d\\u043e\\u0439, \\u0430 \\u0431\\u043e\\u0440\\u044c\\u0431\\u0430 - \\u0440\\u0435\\u0430\\u043b\\u044c\\u043d\\u043e\\u0439. \\u041d\\u0435\\u0441\\u043c\\u043e\\u0442\\u0440\\u044f \\u043d\\u0430 \\u0432\\u0441\\u0435 \\u0442\\u0440\\u0443\\u0434\\u043d\\u043e\\u0441\\u0442\\u0438, \\u0418\\u0432\\u0430\\u043d \\u0431\\u044b\\u043b \\u0440\\u0435\\u0448\\u0435\\u043d \\u0441\\u0434\\u0435\\u043b\\u0430\\u0442\\u044c \\u0432\\u0441\\u0435 \\u0432\\u043e\\u0437\\u043c\\u043e\\u0436\\u043d\\u043e\\u0435, \\u0447\\u0442\\u043e\\u0431\\u00a3\\u044b \\u0434\\u043e\\u0441\\u0442\\u0438\\u0447\\u044c \\u0441\\u0432\\u043e\\u0438\\u0445 \\u0446\\u0435\\u043b\\u0435\\u0439. \\u041e\\u043d \\u0437\\u043d\\u0430\\u043b, \\u0447\\u0442\\u043e \\u0435\\u043c\\u0443 \\u043f\\u0440\\u0438\\u0434\\u0435\\u0442\\u0441\\u044f \\u043c\\u043d\\u043e\\u0433\\u043e \\u0440\\u0430\\u0431\\u043e\\u0442\\u0430\\u0442\\u044c \\u0438 \\u043d\\u0438\\u043a\\u043e\\u0433\\u0434\\u0430 \\u043d\\u0435 \\u0441\\u0434\\u0430\\u0432\\u0430\\u0442\\u044c\\u0441\\u044f. \\u0415\\u0433\\u043e \\u043f\\u0443\\u0442\\u044c \\u0431\\u044b\\u043b \\u043d\\u0435\\u043f\\u0440\\u043e\\u0441\\u0442, \\u043d\\u043e \\u0431\\u043b\\u0430\\u0433\\u043e\\u0434\\u0430\\u0440\\u044f \\u043f\\u043e\\u0434\\u0434\\u0435\\u0440\\u0436\\u043a\\u0435 \\u0434\\u0440\\u0443\\u0437\\u0435\\u0439 \\u0438 \\u043d\\u0435\\u043f\\u043e\\u043a\\u043e\\u043b\\u0435\\u0431\\u0438\\u043c\\u043e\\u0439 \\u0440\\u0435\\u0448\\u0438\\u043c\\u043e\\u0441\\u0442\\u0438, \\u043e\\u043d \\u0441\\u043c\\u043e\\u0433 \\u043f\\u0440\\u0435\\u043e\\u0434\\u043e\\u043b\\u0435\\u0442\\u044c \\u0432\\u0441\\u0435 \\u043f\\u0440\\u0435\\u043f\\u044f\\u0442\\u0441\\u0442\\u0432\\u0438\\u044f \\u0438 \\u0434\\u00a1\\u043e\\u0441\\u0442\\u0438\\u0447\\u044c \\u0441\\u0432\\u043e\\u0438\\u0445 \\u043c\\u0435\\u0447\\u0442.\",\n          \"En un peque\\u00f1o pueblo situado entre la\\u00a3s m\\u00bfonta\\u00f1as y el mar, viv\\u00eda una chica llamada Maria. Era conocida por su bondad y su amor por la naturaleza. Su vida estaba llena de personajes interesantes: el viejo pescador Juan, que le contaba historias del mar, el joven Tom\\u00e1s, que se un\\u00eda a ella en sus aventuras, y su mejor amiga Emilia, que siempre estaba all\\u00ed\\u00a1 para ella. Juntos, vivieron muchas aventuras, y cada d\\u00eda era un nuevo descubrimiento para ellos\\u00a3.\",\n          \"The sun was setting, casting long shadows over the small town. Jo\\u00bfhn, a middle-aged man with a heart full of dreams, was sitting on the porch of his old house. His friends, Peter and Mary, were there with him, sharing stories of their youth. They were all from the same tow\\u00a1n, and their lives were intertwined in ways they could never have imagined. Peter, a burly man with a booming voice, was the town's blacksmith. He was a man of few words, but his actions spoke volumes. Mary, on the other hand, was the town's schoolteacher. She was a woman of great wisdom and kindness, always ready to lend a helping hand. As they s\\u00a1at there, reminiscing about the past, a sense of nostalgia washed over them. They remembered the days when they were young and full of hope, when the world seemed full of possibilities. But now, as they looked at the setting sun, they realized that those days were long gone. John, however, was not ready to give up. He still had dreams, dreams that he was determined to fulfill. He knew that it would not be easy, but he was ready to face whatever challenges came his way. With Peter and Mary by his side, he knew that he could achieve anything.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"cleaned_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 13,\n        \"samples\": [\n          \"\\u0412 \\u0433\\u043e\\u0440\\u043e\\u0434\\u0435 \\u0431\\u044b\\u043b\\u043e \\u043c\\u043d\\u043e\\u0436\\u0435\\u0441\\u0442\\u0432\\u043e \\u043b\\u044e\\u0434\\u0435\\u0439, \\u043a\\u0430\\u0436\\u0434\\u044b\\u0439 \\u0438\\u0437 \\u043a\\u043e\\u0442\\u043e\\u0440\\u044b\\u0445 \\u0431\\u044b\\u043b \\u0437\\u0430\\u043d\\u044f\\u0442 \\u0441\\u0432\\u043e\\u0438\\u043c\\u0438 \\u0434\\u0435\\u043b\\u0430\\u043c\\u0438. \\u0421\\u0440\\u0435\\u0434\\u0438 \\u044d\\u0442\\u043e\\u0439 \\u0441\\u0443\\u0435\\u0442\\u044b, \\u0418\\u0432\\u0430\\u043d, \\u043c\\u043e\\u043b\\u043e\\u0434\\u043e\\u0439 \\u0447\\u0435\\u043b\\u043e\\u0432\\u0435\\u043a \\u0441 \\u0431\\u043e\\u043b\\u044c\\u0448\\u0438\\u043c\\u0438 \\u043c\\u0435\\u0447\\u0442\\u0430\\u043c\\u0438, \\u043f\\u044b\\u0442\\u0430\\u043b\\u0441\\u044f \\u043d\\u0430\\u0439\\u0442\\u0438 \\u0441\\u0432\\u043e\\u0435 \\u043c\\u0435\\u0441\\u0442\\u043e. \\u0415\\u0433\\u043e \\u0434\\u0440\\u0443\\u0437\\u044c\\u044f, \\u0421\\u0435\\u0440\\u0433\\u0435\\u0439 \\u0438 \\u041c\\u0430\\u0440\\u0438\\u044f, \\u0432\\u0441\\u0435\\u0433\\u0434\\u0430 \\u0431\\u044b\\u043b\\u0438 \\u0440\\u044f\\u0434\\u043e\\u043c, \\u043f\\u043e\\u0434\\u0434\\u0435\\u0440\\u0436\\u0438\\u0432\\u0430\\u044f \\u0435\\u0433\\u043e \\u0432\\u043e \\u0432\\u0441\\u0435\\u0445 \\u043d\\u0430\\u0447\\u0438\\u043d\\u0430\\u043d\\u0438\\u044f\\u0445. \\u041e\\u043d\\u0438 \\u0432\\u0435\\u0440\\u0438\\u043b\\u0438 \\u0432 \\u043d\\u0435\\u0433\\u043e, \\u043a\\u043e\\u0433\\u0434\\u0430 \\u043d\\u0438\\u043a\\u0442\\u043e \\u0434\\u0440\\u0443\\u0433\\u043e\\u0439 \\u043d\\u0435 \\u0432\\u0435\\u0440\\u0438\\u043b. \\u041d\\u043e \\u0436\\u0438\\u0437\\u043d\\u044c \\u0432 \\u0433\\u043e\\u0440\\u043e\\u0434\\u0435 \\u0431\\u044b\\u043b\\u0430 \\u043d\\u0435\\u043f\\u0440\\u043e\\u0441\\u0442\\u043e\\u0439. \\u041a\\u043e\\u043d\\u043a\\u0443\\u0440\\u0435\\u043d\\u0446\\u0438\\u044f \\u0431\\u044b\\u043b\\u0430 \\u043e\\u0436\\u0435\\u0441\\u0442\\u043e\\u0447\\u0435\\u043d\\u043d\\u043e\\u0439, \\u0430 \\u0431\\u043e\\u0440\\u044c\\u0431\\u0430 - \\u0440\\u0435\\u0430\\u043b\\u044c\\u043d\\u043e\\u0439. \\u041d\\u0435\\u0441\\u043c\\u043e\\u0442\\u0440\\u044f \\u043d\\u0430 \\u0432\\u0441\\u0435 \\u0442\\u0440\\u0443\\u0434\\u043d\\u043e\\u0441\\u0442\\u0438, \\u0418\\u0432\\u0430\\u043d \\u0431\\u044b\\u043b \\u0440\\u0435\\u0448\\u0435\\u043d \\u0441\\u0434\\u0435\\u043b\\u0430\\u0442\\u044c \\u0432\\u0441\\u0435 \\u0432\\u043e\\u0437\\u043c\\u043e\\u0436\\u043d\\u043e\\u0435, \\u0447\\u0442\\u043e\\u0431\\u044b \\u0434\\u043e\\u0441\\u0442\\u0438\\u0447\\u044c \\u0441\\u0432\\u043e\\u0438\\u0445 \\u0446\\u0435\\u043b\\u0435\\u0439. \\u041e\\u043d \\u0437\\u043d\\u0430\\u043b, \\u0447\\u0442\\u043e \\u0435\\u043c\\u0443 \\u043f\\u0440\\u0438\\u0434\\u0435\\u0442\\u0441\\u044f \\u043c\\u043d\\u043e\\u0433\\u043e \\u0440\\u0430\\u0431\\u043e\\u0442\\u0430\\u0442\\u044c \\u0438 \\u043d\\u0438\\u043a\\u043e\\u0433\\u0434\\u0430 \\u043d\\u0435 \\u0441\\u0434\\u0430\\u0432\\u0430\\u0442\\u044c\\u0441\\u044f. \\u0415\\u0433\\u043e \\u043f\\u0443\\u0442\\u044c \\u0431\\u044b\\u043b \\u043d\\u0435\\u043f\\u0440\\u043e\\u0441\\u0442, \\u043d\\u043e \\u0431\\u043b\\u0430\\u0433\\u043e\\u0434\\u0430\\u0440\\u044f \\u043f\\u043e\\u0434\\u0434\\u0435\\u0440\\u0436\\u043a\\u0435 \\u0434\\u0440\\u0443\\u0437\\u0435\\u0439 \\u0438 \\u043d\\u0435\\u043f\\u043e\\u043a\\u043e\\u043b\\u0435\\u0431\\u0438\\u043c\\u043e\\u0439 \\u0440\\u0435\\u0448\\u0438\\u043c\\u043e\\u0441\\u0442\\u0438, \\u043e\\u043d \\u0441\\u043c\\u043e\\u0433 \\u043f\\u0440\\u0435\\u043e\\u0434\\u043e\\u043b\\u0435\\u0442\\u044c \\u0432\\u0441\\u0435 \\u043f\\u0440\\u0435\\u043f\\u044f\\u0442\\u0441\\u0442\\u0432\\u0438\\u044f \\u0438 \\u0434\\u043e\\u0441\\u0442\\u0438\\u0447\\u044c \\u0441\\u0432\\u043e\\u0438\\u0445 \\u043c\\u0435\\u0447\\u0442.\",\n          \"En un peque\\u00f1o pueblo situado entre las monta\\u00f1as y el mar, viv\\u00eda una chica llamada Maria. Era conocida por su bondad y su amor por la naturaleza. Su vida estaba llena de personajes interesantes: el viejo pescador Juan, que le contaba historias del mar, el joven Tom\\u00e1s, que se un\\u00eda a ella en sus aventuras, y su mejor amiga Emilia, que siempre estaba all\\u00ed para ella. Juntos, vivieron muchas aventuras, y cada d\\u00eda era un nuevo descubrimiento para ellos.\",\n          \"The sun was setting, casting long shadows over the small town. John, a middle-aged man with a heart full of dreams, was sitting on the porch of his old house. His friends, Peter and Mary, were there with him, sharing stories of their youth. They were all from the same town, and their lives were intertwined in ways they could never have imagined. Peter, a burly man with a booming voice, was the town's blacksmith. He was a man of few words, but his actions spoke volumes. Mary, on the other hand, was the town's schoolteacher. She was a woman of great wisdom and kindness, always ready to lend a helping hand. As they sat there, reminiscing about the past, a sense of nostalgia washed over them. They remembered the days when they were young and full of hope, when the world seemed full of possibilities. But now, as they looked at the setting sun, they realized that those days were long gone. John, however, was not ready to give up. He still had dreams, dreams that he was determined to fulfill. He knew that it would not be easy, but he was ready to face whatever challenges came his way. With Peter and Mary by his side, he knew that he could achieve anything.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe",
              "variable_name": "df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-cba5b5bb-5d6b-40be-8e7d-be4fc3d80eae\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>raw_text</th>\n",
              "      <th>cleaned_text</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>The sun was setting, casting long shadows over...</td>\n",
              "      <td>The sun was setting, casting long shadows over...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Le soleil se couchait, jetant de longues ombre...</td>\n",
              "      <td>Le soleil se couchait, jetant de longues ombre...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>El sol se estaba poniendo, proyectando largas ...</td>\n",
              "      <td>El sol se estaba poniendo, proyectando largas ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>La ciudad estaba llena de vida, sus calles lle...</td>\n",
              "      <td>La ciudad estaba llena de vida, sus calles lle...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>La ville était pleine de vie, ses rues remplie...</td>\n",
              "      <td>La ville était pleine de vie, ses rues remplie...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>Die Stadt¿ war voller Leben, ihre St£raß¿en ge...</td>\n",
              "      <td>Die Stadt war voller Leben, ihre Straßen gefül...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>Die Sonne g¿ing unter und warf lange Schat£ten...</td>\n",
              "      <td>Die Sonne ging unter und warf lange Schatten ü...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>В тихом уголке старого города, где узкие уло¿ч...</td>\n",
              "      <td>В тихом уголке старого города, где узкие улочк...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>In a small town nestled between the mountains ...</td>\n",
              "      <td>In a small town nestled between the mountains ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>En un pequeño pueblo situado entre la£s m¿onta...</td>\n",
              "      <td>En un pequeño pueblo situado entre las montaña...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>Dans un petit village niché entre les montagne...</td>\n",
              "      <td>Dans un petit village niché entre les montagne...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>В городе было множество лю£дей, каждый из ко¿т...</td>\n",
              "      <td>В городе было множество людей, каждый из котор...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>£La città era piena di vita, le strade piene d...</td>\n",
              "      <td>La città era piena di vita, le strade piene de...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cba5b5bb-5d6b-40be-8e7d-be4fc3d80eae')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-cba5b5bb-5d6b-40be-8e7d-be4fc3d80eae button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-cba5b5bb-5d6b-40be-8e7d-be4fc3d80eae');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-5840d9ce-1491-4f8d-8ae3-99ef7bb121d0\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5840d9ce-1491-4f8d-8ae3-99ef7bb121d0')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-5840d9ce-1491-4f8d-8ae3-99ef7bb121d0 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "                                             raw_text  \\\n",
              "0   The sun was setting, casting long shadows over...   \n",
              "1   Le soleil se couchait, jetant de longues ombre...   \n",
              "2   El sol se estaba poniendo, proyectando largas ...   \n",
              "3   La ciudad estaba llena de vida, sus calles lle...   \n",
              "4   La ville était pleine de vie, ses rues remplie...   \n",
              "5   Die Stadt¿ war voller Leben, ihre St£raß¿en ge...   \n",
              "6   Die Sonne g¿ing unter und warf lange Schat£ten...   \n",
              "7   В тихом уголке старого города, где узкие уло¿ч...   \n",
              "8   In a small town nestled between the mountains ...   \n",
              "9   En un pequeño pueblo situado entre la£s m¿onta...   \n",
              "10  Dans un petit village niché entre les montagne...   \n",
              "11  В городе было множество лю£дей, каждый из ко¿т...   \n",
              "12  £La città era piena di vita, le strade piene d...   \n",
              "\n",
              "                                         cleaned_text  \n",
              "0   The sun was setting, casting long shadows over...  \n",
              "1   Le soleil se couchait, jetant de longues ombre...  \n",
              "2   El sol se estaba poniendo, proyectando largas ...  \n",
              "3   La ciudad estaba llena de vida, sus calles lle...  \n",
              "4   La ville était pleine de vie, ses rues remplie...  \n",
              "5   Die Stadt war voller Leben, ihre Straßen gefül...  \n",
              "6   Die Sonne ging unter und warf lange Schatten ü...  \n",
              "7   В тихом уголке старого города, где узкие улочк...  \n",
              "8   In a small town nestled between the mountains ...  \n",
              "9   En un pequeño pueblo situado entre las montaña...  \n",
              "10  Dans un petit village niché entre les montagne...  \n",
              "11  В городе было множество людей, каждый из котор...  \n",
              "12  La città era piena di vita, le strade piene de...  "
            ]
          },
          "execution_count": 7,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6N_PzR2nOOml"
      },
      "source": [
        "Будем просить у модели определять язык и имя главного персонажа и выдавать ответ в виде словаря. Для этого создадим Output parser, с которым вы уже познакомились в прошлых уроках."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rAGYgfybYO81"
      },
      "outputs": [],
      "source": [
        "\n",
        "# Схема для определения языка текста\n",
        "language_schema = ResponseSchema(\n",
        "    name=\"language_detection\",\n",
        "    description=\"Detect the language of the text\",  # Добавлено описание схемы\n",
        "    fields=[\n",
        "        {\"name\": \"language\", \"path\": [\"language\"], \"data_type\": \"str\"}\n",
        "    ]\n",
        ")\n",
        "\n",
        "# Схема для определения главного персонажа\n",
        "person_schema = ResponseSchema(\n",
        "    name=\"main_character\",\n",
        "    description=\"Identify the main character in the text\",  # Добавлено описание схемы\n",
        "    fields=[\n",
        "        {\"name\": \"main_character\", \"path\": [\"main_character\"], \"data_type\": \"str\"}\n",
        "    ]\n",
        ")\n",
        "\n",
        "response_schemas = [language_schema, person_schema]\n",
        "\n",
        "# Создание парсера ответов и подача в него списка со схемами\n",
        "output_parser = StructuredOutputParser(response_schemas=response_schemas)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "background_save": true
        },
        "id": "jXjGux0x4Qtb"
      },
      "outputs": [],
      "source": [
        "# получаем инструкции по форматированию ответа\n",
        "format_instructions = output_parser.get_format_instructions()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JM8ulzQTOUYc"
      },
      "source": [
        "Напишем шаблон промпта со своим вопросом и инструкциями по форматированию ответа. Будем передавать в этот промпт сырой текст"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "4qVrpYz-lDdY"
      },
      "outputs": [],
      "source": [
        "\n",
        "# Создание шаблона промпта\n",
        "prompt_template = PromptTemplate\n",
        "    template='''Please analyze the text: {text}'. What is the language of the text? Who is the main character mentioned in the text? {format_instructions}''',\n",
        "    input_variables=['text']  #\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "S95kFg08OcNF"
      },
      "source": [
        "Создадим цепочку с помощью LCEL\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "GGcsjFnnweLf"
      },
      "outputs": [],
      "source": [
        "# Создание цепочки с использованием llm, prompt_template и output_parser\n",
        "chain = LLMChain(llm=llm, prompt=prompt_template, output_parser=output_parser)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-Zap73ZIbtX3",
        "outputId": "9cd9f7d1-e43d-4034-d5cc-60f341d2a32f"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "  8%|▊         | 1/13 [00:01<00:19,  1.61s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 15%|█▌        | 2/13 [00:02<00:15,  1.37s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 23%|██▎       | 3/13 [00:04<00:14,  1.40s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 31%|███       | 4/13 [00:05<00:13,  1.45s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 38%|███▊      | 5/13 [00:07<00:10,  1.37s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 46%|████▌     | 6/13 [00:08<00:09,  1.33s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 54%|█████▍    | 7/13 [00:09<00:07,  1.32s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 62%|██████▏   | 8/13 [00:10<00:06,  1.32s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 69%|██████▉   | 9/13 [00:13<00:06,  1.66s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 77%|███████▋  | 10/13 [00:14<00:04,  1.53s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 85%|████████▍ | 11/13 [00:15<00:02,  1.40s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\r 92%|█████████▏| 12/13 [00:16<00:01,  1.36s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "100%|██████████| 13/13 [00:18<00:00,  1.39s/it]"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Ошибка при обработке текста: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)\n",
            "                                             raw_text language main_character\n",
            "0   The sun was setting, casting long shadows over...    error          error\n",
            "1   Le soleil se couchait, jetant de longues ombre...    error          error\n",
            "2   El sol se estaba poniendo, proyectando largas ...    error          error\n",
            "3   La ciudad estaba llena de vida, sus calles lle...    error          error\n",
            "4   La ville était pleine de vie, ses rues remplie...    error          error\n",
            "5   Die Stadt¿ war voller Leben, ihre St£raß¿en ge...    error          error\n",
            "6   Die Sonne g¿ing unter und warf lange Schat£ten...    error          error\n",
            "7   В тихом уголке старого города, где узкие уло¿ч...    error          error\n",
            "8   In a small town nestled between the mountains ...    error          error\n",
            "9   En un pequeño pueblo situado entre la£s m¿onta...    error          error\n",
            "10  Dans un petit village niché entre les montagne...    error          error\n",
            "11  В городе было множество лю£дей, каждый из ко¿т...    error          error\n",
            "12  £La città era piena di vita, le strade piene d...    error          error\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\n"
          ]
        }
      ],
      "source": [
        "from tqdm import tqdm\n",
        "import pandas as pd\n",
        "import json\n",
        "\n",
        "# Предполагаем, что df уже загружен и содержит колонку 'cleaned_text'\n",
        "df['language'] = \"\"\n",
        "df['main_character'] = \"\"\n",
        "\n",
        "for index, row in tqdm(df.iterrows(), total=df.shape[0]):\n",
        "    text = row['cleaned_text']\n",
        "    if not text.strip():  # Добавляем проверку на пустой текст\n",
        "        df.at[index, 'language'] = 'empty'\n",
        "        df.at[index, 'main_character'] = 'empty'\n",
        "        continue\n",
        "    try:\n",
        "        result = chain.invoke(input={\"text\": text})\n",
        "        if isinstance(result, str):  # Проверяем, является ли результат строкой\n",
        "            try:\n",
        "                result = json.loads(result)  # Пытаемся преобразовать строку в JSON\n",
        "            except json.JSONDecodeError:\n",
        "                raise ValueError(\"Received response is not a valid JSON\")\n",
        "\n",
        "        # Проверяем, является ли результат словарём после возможного преобразования\n",
        "        if not isinstance(result, dict):\n",
        "            raise ValueError(\"Result is not a valid JSON object\")\n",
        "\n",
        "        df.at[index, 'language'] = result.get('language', 'unknown')\n",
        "        df.at[index, 'main_character'] = result.get('main_character', 'unknown')\n",
        "    except Exception as e:\n",
        "        print(f\"Ошибка при обработке текста: {e}\")\n",
        "        df.at[index, 'language'] = 'error'\n",
        "        df.at[index, 'main_character'] = 'error'\n",
        "\n",
        "print(df[['raw_text', 'language', 'main_character']])\n"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}