diff --git a/README.md b/README.md
index 223030f..999117d 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ To quickly learn how to run cleanlab on your own data, first check out the [quic
 | 21  | [non_iid_detection](non_iid_detection/non_iid_detection.ipynb)  | Use Datalab to detect non-IID sampling (e.g. drift) in datasets based on numeric features or embeddings. |
 | 22  | [object_detection](object_detection/README.md)  | Train Detectron2 object detection model for use with cleanlab. |
 | 23  | [semantic segmentation](segmentation/training_ResNeXt50_for_Semantic_Segmentation_on_SYNTHIA.ipynb)  | Train ResNeXt semantic segmentation model for use with cleanlab. |
+| 24 | [llm_evals_w_crowdlab](llm_evals_w_crowdlab/llm_evals_w_crowdlab.ipynb) | Uses GPT4o and CROWDLAB to evaluate language models on a dataset labeled by multiple annotators. |
 
 
 ## Instructions
diff --git a/llm_evals_w_crowdlab/llm_evals_w_crowdlab.ipynb b/llm_evals_w_crowdlab/llm_evals_w_crowdlab.ipynb
new file mode 100644
index 0000000..2a40c7f
--- /dev/null
+++ b/llm_evals_w_crowdlab/llm_evals_w_crowdlab.ipynb
@@ -0,0 +1,7072 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "view-in-github"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/gist/nelsonauner/e81daa4c306ed111e2ed224b7cc715f2/cleanlab-crowdlab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "dc2395c9-877b-44cc-97aa-715f133ab5fb",
+      "metadata": {
+        "id": "dc2395c9-877b-44cc-97aa-715f133ab5fb"
+      },
+      "source": [
+        "# Step 1: Data Cleaning and Exploration\n",
+        "\n",
+        "Let's get into it!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "CsmTkzQfFXSt",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "CsmTkzQfFXSt",
+        "outputId": "c0d25460-3c21-4259-ed45-895a8bd9a34d"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.20.0)\n",
+            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.0.3)\n",
+            "Requirement already satisfied: openai in /usr/local/lib/python3.10/dist-packages (1.35.6)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.25.2)\n",
+            "Requirement already satisfied: cleanlab in /usr/local/lib/python3.10/dist-packages (2.6.6)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.15.3)\n",
+            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (16.1.0)\n",
+            "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n",
+            "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n",
+            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n",
+            "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.4)\n",
+            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n",
+            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n",
+            "Requirement already satisfied: fsspec[http]<=2024.5.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
+            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.5)\n",
+            "Requirement already satisfied: huggingface-hub>=0.21.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.23.4)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
+            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
+            "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1)\n",
+            "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n",
+            "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n",
+            "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai) (0.27.0)\n",
+            "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai) (2.7.4)\n",
+            "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n",
+            "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai) (4.12.2)\n",
+            "Requirement already satisfied: scikit-learn>=1.1 in /usr/local/lib/python3.10/dist-packages (from cleanlab) (1.2.2)\n",
+            "Requirement already satisfied: termcolor>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from cleanlab) (2.4.0)\n",
+            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (3.7)\n",
+            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.1)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n",
+            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
+            "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2024.6.2)\n",
+            "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (1.0.5)\n",
+            "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n",
+            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n",
+            "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai) (2.18.4)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.3.2)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.0.7)\n",
+            "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->cleanlab) (1.11.4)\n",
+            "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->cleanlab) (1.4.2)\n",
+            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->cleanlab) (3.5.0)\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install datasets pandas openai numpy cleanlab"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "id": "7d20bb0a-2e10-407a-a196-86cd8178d8be",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 281,
+          "referenced_widgets": [
+            "4b42498db3a04532b8189257528b2148",
+            "81c98be02f9f41d682c00b2344f5682b",
+            "6ee9ae4b7d2f4c788c2208f7696cc8db",
+            "e005079ce16649c48cd9b38f05d5d24d",
+            "1b2e5ab1ca454f78be34f6e4f2da7e88",
+            "e19558f2eb3d49328a0d4cc800a0b4ad",
+            "5bbd468db6a8451d91ce8b833e4b38d6",
+            "69914ef99c5d44b48690888610766e60",
+            "08db9645f6714d1faaee229c6f20e39d",
+            "d4a3531f15084bd4ab0b4c05c35425d9",
+            "eeb1766106b84da7a41bf86de2597cbb",
+            "088be0b3fd9c45b4b46d42dce24d74db",
+            "dbc1a1892b984847b87215c43e77e6df",
+            "eaff1c432bc64bb09d48abf1c54a0c19",
+            "f8cdc7f86a90420d90a5e186d57d7c73",
+            "117d5fb668ad430c90cb66f373ac4c67",
+            "d76a9a5b57874f8aad5398ce749f9886",
+            "d356a48df95a4177bb5a6355d61df202",
+            "10efe147ad7e4d8099fe7c5c78896e01",
+            "552f2c362f6b46958dae49c6c21c3c1d",
+            "c72e9abef718441baf1ae313c8e5dbf9",
+            "f0900425ee8e46beb75d33697c80bddd",
+            "8ed4dc8f8c074d7a840c6b02687d6d9c",
+            "350f203cc54842618a7a408374c470f1",
+            "c8cf37674d284ed88148a96671b20b65",
+            "079434f2d9164264a8408aaf28ee6db9",
+            "3464f8ec3901488b8799bc71f22083ef",
+            "eb5b50da89e944dabb0bbadb76ccc447",
+            "3f35927ec50a49e1bb81cb6d46d9bf7f",
+            "51339d5fe66b4d85a13c1ef9ad062776",
+            "a8d6995b42df46959b1c55267f18c6e4",
+            "af16803419b34f7ab15ac8b33d6dadfb",
+            "79ab684f0c9c4ad5afbb137d8d4693c0",
+            "13128542a16446bbbe00ea10ef901cbb",
+            "ef7ecc82f9484316b9f3f5b05332f5c7",
+            "e2e9470eff1448ecb13922a0762c4f62",
+            "fe87dbdb92dc43f7963f9b9a87a08df0",
+            "fc41d9d6971b4f7cafa4247a82d59dec",
+            "5ed508b45ed140e692f2f9d0a446c9f0",
+            "282ed582df3840099d1c7c2ae9e8940e",
+            "435196039a964bbbbdb1b8b498ad9a88",
+            "e601948125fe411fb61fb97bacfc275f",
+            "f3ce70515e5b4f648e0472aa0cce5bf3",
+            "21de204374fe47f9872f70b32c0e174c",
+            "4165bcffeb484d1f8b10728a55ac64fe",
+            "7a9272c23aa745d4b2bc073e72e18cc3",
+            "ff70ea10be5d40b59bd357afe599e6bf",
+            "896a952d146a4bc9ad2b035828df470a",
+            "716e16fe466a4cd4a2f815101816c3cd",
+            "05d5ef9d6f65409ab584a88cb8d0461b",
+            "a88c99216ab8471786687f980b039be7",
+            "896e188c5b3c45f1b8488abefc79b733",
+            "0593582beaa34d4b87282fa14276585c",
+            "58aff3879a4f4858b170a214b3f74cc6",
+            "c3d846a22d684009994bf4592bb43922"
+          ]
+        },
+        "id": "7d20bb0a-2e10-407a-a196-86cd8178d8be",
+        "outputId": "fed598ed-1e51-4656-a9e2-cd2bebc0dbc7"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
+            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+            "You will be able to reuse this secret in all of your notebooks.\n",
+            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "4b42498db3a04532b8189257528b2148",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading readme:   0%|          | 0.00/2.00k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "088be0b3fd9c45b4b46d42dce24d74db",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading data:   0%|          | 0.00/650k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "8ed4dc8f8c074d7a840c6b02687d6d9c",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading data:   0%|          | 0.00/739k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "13128542a16446bbbe00ea10ef901cbb",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Generating gpt4_pair split:   0%|          | 0/2400 [00:00<?, ? examples/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "4165bcffeb484d1f8b10728a55ac64fe",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Generating human split:   0%|          | 0/3355 [00:00<?, ? examples/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "from datasets import load_dataset\n",
+        "\n",
+        "dataset = load_dataset(\"lmsys/mt_bench_human_judgments\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "id": "b48aba43-3e7d-4fcf-8759-6f670038c8c0",
+      "metadata": {
+        "id": "b48aba43-3e7d-4fcf-8759-6f670038c8c0"
+      },
+      "outputs": [],
+      "source": [
+        "import argparse\n",
+        "import json\n",
+        "import os\n",
+        "\n",
+        "import numpy as np\n",
+        "from datasets import load_dataset\n",
+        "import pandas as pd\n",
+        "\n",
+        "from cleanlab.multiannotator import get_label_quality_multiannotator, get_majority_vote_label"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "id": "49903edf-a05c-486a-a75a-dad403199d4f",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "49903edf-a05c-486a-a75a-dad403199d4f",
+        "outputId": "7cc5bbee-2b33-4f4d-8aec-1b33c47e3b80"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "DatasetDict({\n",
+              "    gpt4_pair: Dataset({\n",
+              "        features: ['question_id', 'model_a', 'model_b', 'winner', 'judge', 'conversation_a', 'conversation_b', 'turn'],\n",
+              "        num_rows: 2400\n",
+              "    })\n",
+              "    human: Dataset({\n",
+              "        features: ['question_id', 'model_a', 'model_b', 'winner', 'judge', 'conversation_a', 'conversation_b', 'turn'],\n",
+              "        num_rows: 3355\n",
+              "    })\n",
+              "})"
+            ]
+          },
+          "execution_count": 5,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "id": "da4114da-51ea-4d5d-aa78-a25156935613",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 98,
+          "referenced_widgets": [
+            "fd3dcf7c89cf4f4aae7095ec921ad53a",
+            "f8be2158142447f19bba5889357abc1c",
+            "9fb5588c5f7d418691fc5ce815d42383",
+            "aa9e5107279d46fca40c992b03102c52",
+            "6dd5334fcc8648ed8a72164899335a13",
+            "8d78f2ed550d41418e7854c384c52463",
+            "1f276ec1e4b84f599200bca5c14a1f61",
+            "403c363ff9ce44cb9686259db0fe31e6",
+            "ec5c3cee541e4144976c3e64aa6e69ce",
+            "55853062fa1c4567b54e8202dc3ea9fa",
+            "5cde2bf3bce545008b811be5afb68262",
+            "c5dad19757724fb8a5f95069dcb282a4",
+            "0dd58e70dc2c483b94aa209848f829f0",
+            "83b32b1b16fc47fabc0a6cdb80d393e9",
+            "fb0aec799c1a40f9a9b148921a6421fa",
+            "016c4f9c04b74ebbbbbc7f911adfcd42",
+            "86f076064f1e4f4bbf8512f63b7ec1d9",
+            "47c35d402c76496396356823e7e5529f",
+            "51e3fabc729d4eb59e4d0409a93ab4bb",
+            "b0c7ecd44ccd490f94e33182f59d9fcb",
+            "dd3a3867381b494aa04e301c66c102d1",
+            "20a5a2d7eb7c4d9bbc04a72188facdd4"
+          ]
+        },
+        "id": "da4114da-51ea-4d5d-aa78-a25156935613",
+        "outputId": "18af3b29-36e3-42b1-eda8-013f0bb114ae"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "fd3dcf7c89cf4f4aae7095ec921ad53a",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Creating json from Arrow format:   0%|          | 0/4 [00:00<?, ?ba/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "c5dad19757724fb8a5f95069dcb282a4",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Creating json from Arrow format:   0%|          | 0/3 [00:00<?, ?ba/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": [
+              "11356420"
+            ]
+          },
+          "execution_count": 6,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "dataset[\"human\"].to_json(\"human_judgments.json\")\n",
+        "dataset[\"gpt4_pair\"].to_json(\"gpt4_pair_judgments.json\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9e93803e-9eb7-46bb-940f-2048c255ca54",
+      "metadata": {
+        "id": "9e93803e-9eb7-46bb-940f-2048c255ca54"
+      },
+      "source": [
+        "We'll be using the \"human\" data set, which is where different humans evaluated which LLM most correctly answered the question. This task has the additional (realistic) wrinkle that different LLM pairings have different numbers of graders"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "id": "be077aa2-8048-4e4a-b3e9-0b0d219ac6f4",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "be077aa2-8048-4e4a-b3e9-0b0d219ac6f4",
+        "outputId": "62834418-f1a0-4abc-ecb3-1f776a9df219"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "question_id  model_a          model_b          turn\n",
+            "81           alpaca-13b       gpt-3.5-turbo    1       [author_2, expert_17]\n",
+            "                                               2       [author_2, expert_17]\n",
+            "                              vicuna-13b-v1.2  1                  [expert_0]\n",
+            "                                               2                  [expert_0]\n",
+            "             claude-v1        alpaca-13b       1                 [expert_37]\n",
+            "                                                               ...          \n",
+            "160          vicuna-13b-v1.2  claude-v1        2       [expert_0, expert_36]\n",
+            "                              gpt-3.5-turbo    1       [author_4, expert_43]\n",
+            "                                               2       [author_4, expert_43]\n",
+            "                              llama-13b        1                  [expert_0]\n",
+            "                                               2                  [expert_0]\n",
+            "Name: judge, Length: 2396, dtype: object\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Group by 'question_id', 'model_a', 'model_b', and 'turn'\n",
+        "grouped = pd.DataFrame(dataset[\"human\"]).groupby(['question_id', 'model_a', 'model_b', 'turn'])\n",
+        "\n",
+        "# Aggregate to find unique judges in each group\n",
+        "unique_judges_distribution = grouped['judge'].unique()\n",
+        "\n",
+        "# Print the distribution of unique judges\n",
+        "print(unique_judges_distribution)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "79c0f659-163e-4c51-8cd1-82d95518c285",
+      "metadata": {
+        "id": "79c0f659-163e-4c51-8cd1-82d95518c285"
+      },
+      "source": [
+        "Now that we have our data, we'll do a bit of cleaning.\n",
+        "\n",
+        "\n",
+        "First, we're going to need to integer-ize some of these labels. In particular, the algorithm wants the selected label classes (in this example (`model_a`, `model_b`) to be integers, so let's do that.\n",
+        "\n",
+        "Also, this dataset is divided up by \"turns\", but for simplicity's sake, we're going to use the first turn per pairing.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "id": "dc143858-5669-41e3-aa58-b34fceb45561",
+      "metadata": {
+        "id": "dc143858-5669-41e3-aa58-b34fceb45561"
+      },
+      "outputs": [],
+      "source": [
+        "human_df = pd.DataFrame(dataset[\"human\"])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "id": "914dfaa3-d49e-4c68-b971-4f9b92e54a01",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "914dfaa3-d49e-4c68-b971-4f9b92e54a01",
+        "outputId": "a8395e31-257e-44dd-bf37-11dd0e3c2da6"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "(3355, 8)"
+            ]
+          },
+          "execution_count": 9,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "id": "fc4422d7-612e-4a56-83b4-8de338845c3c",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "fc4422d7-612e-4a56-83b4-8de338845c3c",
+        "outputId": "daa4fa20-81df-43c1-8261-c4d16c76bc22"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"human_df\",\n  \"rows\": 3355,\n  \"fields\": [\n    {\n      \"column\": \"question_id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 23,\n        \"min\": 81,\n        \"max\": 160,\n        \"num_unique_values\": 80,\n        \"samples\": [\n          111,\n          81,\n          103\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_a\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"alpaca-13b\",\n          \"claude-v1\",\n          \"llama-13b\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_b\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"gpt-3.5-turbo\",\n          \"vicuna-13b-v1.2\",\n          \"gpt-4\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"winner\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"model_b\",\n          \"tie\",\n          \"model_a\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"judge\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 65,\n        \"samples\": [\n          \"expert_39\",\n          \"expert_53\",\n          \"author_2\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_a\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_b\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"turn\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 2,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          2,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe",
+              "variable_name": "human_df"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-05ef34f0-8f94-44bf-9a14-b8e5b1250178\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>question_id</th>\n",
+              "      <th>model_a</th>\n",
+              "      <th>model_b</th>\n",
+              "      <th>winner</th>\n",
+              "      <th>judge</th>\n",
+              "      <th>conversation_a</th>\n",
+              "      <th>conversation_b</th>\n",
+              "      <th>turn</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>author_2</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>author_2</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>expert_17</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>expert_17</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>vicuna-13b-v1.2</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>expert_0</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-05ef34f0-8f94-44bf-9a14-b8e5b1250178')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-05ef34f0-8f94-44bf-9a14-b8e5b1250178 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-05ef34f0-8f94-44bf-9a14-b8e5b1250178');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-a27b7e3e-bb70-4ab5-b803-a97eb0c25432\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a27b7e3e-bb70-4ab5-b803-a97eb0c25432')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-a27b7e3e-bb70-4ab5-b803-a97eb0c25432 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "   question_id     model_a          model_b   winner      judge  \\\n",
+              "0           81  alpaca-13b    gpt-3.5-turbo  model_b   author_2   \n",
+              "1           81  alpaca-13b    gpt-3.5-turbo  model_b   author_2   \n",
+              "2           81  alpaca-13b    gpt-3.5-turbo  model_b  expert_17   \n",
+              "3           81  alpaca-13b    gpt-3.5-turbo  model_b  expert_17   \n",
+              "4           81  alpaca-13b  vicuna-13b-v1.2  model_b   expert_0   \n",
+              "\n",
+              "                                      conversation_a  \\\n",
+              "0  [{'content': 'Compose an engaging travel blog ...   \n",
+              "1  [{'content': 'Compose an engaging travel blog ...   \n",
+              "2  [{'content': 'Compose an engaging travel blog ...   \n",
+              "3  [{'content': 'Compose an engaging travel blog ...   \n",
+              "4  [{'content': 'Compose an engaging travel blog ...   \n",
+              "\n",
+              "                                      conversation_b  turn  \n",
+              "0  [{'content': 'Compose an engaging travel blog ...     1  \n",
+              "1  [{'content': 'Compose an engaging travel blog ...     2  \n",
+              "2  [{'content': 'Compose an engaging travel blog ...     1  \n",
+              "3  [{'content': 'Compose an engaging travel blog ...     2  \n",
+              "4  [{'content': 'Compose an engaging travel blog ...     1  "
+            ]
+          },
+          "execution_count": 10,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "MbocaXC9NIil",
+      "metadata": {
+        "id": "MbocaXC9NIil"
+      },
+      "source": [
+        "What is the distribution of judges?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 79,
+      "id": "frHkV0YnNLnq",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "frHkV0YnNLnq",
+        "outputId": "9d05987e-21fd-4564-e3b9-07b315f77ab9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "judge\n",
+              "expert_24    205\n",
+              "author_4     204\n",
+              "author_0     180\n",
+              "expert_0     144\n",
+              "author_3     106\n",
+              "            ... \n",
+              "expert_18     10\n",
+              "expert_54     10\n",
+              "expert_30      6\n",
+              "author_1       6\n",
+              "expert_52      4\n",
+              "Name: count, Length: 65, dtype: int64"
+            ]
+          },
+          "execution_count": 79,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df.judge.value_counts()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 80,
+      "id": "0hkfLGqQNPya",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0hkfLGqQNPya",
+        "outputId": "a8637f93-c8e7-4d75-8510-85919d443684"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "judge\n",
+              "True     2668\n",
+              "False     687\n",
+              "Name: count, dtype: int64"
+            ]
+          },
+          "execution_count": 80,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df.judge.str.contains(\"expert\").value_counts()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "id": "c95483f4-1c7f-4b0a-a221-17535a0bde8c",
+      "metadata": {
+        "id": "c95483f4-1c7f-4b0a-a221-17535a0bde8c"
+      },
+      "outputs": [],
+      "source": [
+        "#integer-ize winner labels\n",
+        "mapping_dict = dict(model_a=0, model_b=1)\n",
+        "human_df['winner_binary'] = human_df['winner'].apply(lambda s: mapping_dict.get(s))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7gXd1GlkInb5",
+      "metadata": {
+        "id": "7gXd1GlkInb5"
+      },
+      "source": [
+        "We also transform the dataset from long to wide, using `pivot_table`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "id": "949ad93a-6a2e-4572-a5e3-03ebc48f0e59",
+      "metadata": {
+        "id": "949ad93a-6a2e-4572-a5e3-03ebc48f0e59"
+      },
+      "outputs": [],
+      "source": [
+        "human_df_per_judge = human_df.pivot_table(\n",
+        "    index=['question_id', 'model_a', 'model_b'],\n",
+        "    columns='judge',\n",
+        "    values=['winner_binary'],\n",
+        "    aggfunc='first'  # This aggfunc drops terms beyond 1\n",
+        ")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "id": "4589e1e2-1319-45ed-b15e-7f337dded644",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 405
+        },
+        "id": "4589e1e2-1319-45ed-b15e-7f337dded644",
+        "outputId": "88cef2e9-6dbd-4d49-9b9d-fbba1ae212a2"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "human_df_per_judge"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-762e61fa-cb60-4006-91dc-c3ab9c74ce8e\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead tr th {\n",
+              "        text-align: left;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead tr:last-of-type th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th colspan=\"21\" halign=\"left\">winner_binary</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>judge</th>\n",
+              "      <th>author_0</th>\n",
+              "      <th>author_1</th>\n",
+              "      <th>author_2</th>\n",
+              "      <th>author_3</th>\n",
+              "      <th>author_4</th>\n",
+              "      <th>author_5</th>\n",
+              "      <th>author_6</th>\n",
+              "      <th>expert_0</th>\n",
+              "      <th>expert_1</th>\n",
+              "      <th>expert_10</th>\n",
+              "      <th>...</th>\n",
+              "      <th>expert_52</th>\n",
+              "      <th>expert_53</th>\n",
+              "      <th>expert_54</th>\n",
+              "      <th>expert_55</th>\n",
+              "      <th>expert_56</th>\n",
+              "      <th>expert_57</th>\n",
+              "      <th>expert_6</th>\n",
+              "      <th>expert_7</th>\n",
+              "      <th>expert_8</th>\n",
+              "      <th>expert_9</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>question_id</th>\n",
+              "      <th>model_a</th>\n",
+              "      <th>model_b</th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"5\" valign=\"top\">81</th>\n",
+              "      <th rowspan=\"2\" valign=\"top\">alpaca-13b</th>\n",
+              "      <th>gpt-3.5-turbo</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>...</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>vicuna-13b-v1.2</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>...</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"2\" valign=\"top\">claude-v1</th>\n",
+              "      <th>alpaca-13b</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>...</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>llama-13b</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>...</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>gpt-3.5-turbo</th>\n",
+              "      <th>alpaca-13b</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>...</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 65 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-762e61fa-cb60-4006-91dc-c3ab9c74ce8e')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-762e61fa-cb60-4006-91dc-c3ab9c74ce8e button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-762e61fa-cb60-4006-91dc-c3ab9c74ce8e');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-4937200f-851d-4395-975b-7fff5e684d69\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-4937200f-851d-4395-975b-7fff5e684d69')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-4937200f-851d-4395-975b-7fff5e684d69 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "                                          winner_binary                    \\\n",
+              "judge                                          author_0 author_1 author_2   \n",
+              "question_id model_a       model_b                                           \n",
+              "81          alpaca-13b    gpt-3.5-turbo             NaN      NaN      1.0   \n",
+              "                          vicuna-13b-v1.2           NaN      NaN      NaN   \n",
+              "            claude-v1     alpaca-13b                NaN      NaN      NaN   \n",
+              "                          llama-13b                 NaN      NaN      NaN   \n",
+              "            gpt-3.5-turbo alpaca-13b                NaN      NaN      NaN   \n",
+              "\n",
+              "                                                                               \\\n",
+              "judge                                     author_3 author_4 author_5 author_6   \n",
+              "question_id model_a       model_b                                               \n",
+              "81          alpaca-13b    gpt-3.5-turbo        NaN      NaN      NaN      NaN   \n",
+              "                          vicuna-13b-v1.2      NaN      NaN      NaN      NaN   \n",
+              "            claude-v1     alpaca-13b           NaN      NaN      NaN      NaN   \n",
+              "                          llama-13b            NaN      NaN      NaN      NaN   \n",
+              "            gpt-3.5-turbo alpaca-13b           NaN      NaN      NaN      NaN   \n",
+              "\n",
+              "                                                                       ...  \\\n",
+              "judge                                     expert_0 expert_1 expert_10  ...   \n",
+              "question_id model_a       model_b                                      ...   \n",
+              "81          alpaca-13b    gpt-3.5-turbo        NaN      NaN       NaN  ...   \n",
+              "                          vicuna-13b-v1.2      1.0      NaN       NaN  ...   \n",
+              "            claude-v1     alpaca-13b           NaN      NaN       NaN  ...   \n",
+              "                          llama-13b            NaN      NaN       NaN  ...   \n",
+              "            gpt-3.5-turbo alpaca-13b           NaN      NaN       NaN  ...   \n",
+              "\n",
+              "                                                                         \\\n",
+              "judge                                     expert_52 expert_53 expert_54   \n",
+              "question_id model_a       model_b                                         \n",
+              "81          alpaca-13b    gpt-3.5-turbo         NaN       NaN       NaN   \n",
+              "                          vicuna-13b-v1.2       NaN       NaN       NaN   \n",
+              "            claude-v1     alpaca-13b            NaN       NaN       NaN   \n",
+              "                          llama-13b             NaN       NaN       NaN   \n",
+              "            gpt-3.5-turbo alpaca-13b            NaN       NaN       NaN   \n",
+              "\n",
+              "                                                                         \\\n",
+              "judge                                     expert_55 expert_56 expert_57   \n",
+              "question_id model_a       model_b                                         \n",
+              "81          alpaca-13b    gpt-3.5-turbo         NaN       NaN       NaN   \n",
+              "                          vicuna-13b-v1.2       NaN       NaN       NaN   \n",
+              "            claude-v1     alpaca-13b            NaN       NaN       NaN   \n",
+              "                          llama-13b             NaN       NaN       NaN   \n",
+              "            gpt-3.5-turbo alpaca-13b            NaN       NaN       NaN   \n",
+              "\n",
+              "                                                                               \n",
+              "judge                                     expert_6 expert_7 expert_8 expert_9  \n",
+              "question_id model_a       model_b                                              \n",
+              "81          alpaca-13b    gpt-3.5-turbo        NaN      NaN      NaN      NaN  \n",
+              "                          vicuna-13b-v1.2      NaN      NaN      NaN      NaN  \n",
+              "            claude-v1     alpaca-13b           NaN      NaN      NaN      NaN  \n",
+              "                          llama-13b            NaN      NaN      NaN      NaN  \n",
+              "            gpt-3.5-turbo alpaca-13b           NaN      NaN      NaN      NaN  \n",
+              "\n",
+              "[5 rows x 65 columns]"
+            ]
+          },
+          "execution_count": 13,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df_per_judge.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "Eqi-o-wlIynX",
+      "metadata": {
+        "id": "Eqi-o-wlIynX"
+      },
+      "source": [
+        "What is the distribution of how many judges we have per-conversation?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "id": "a3eefc71-18d9-4be1-bfe8-283557b2a9fa",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "a3eefc71-18d9-4be1-bfe8-283557b2a9fa",
+        "outputId": "1888f121-e3ab-4ebe-9307-2dfa5ec7e9ca"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "1    759\n",
+              "2    269\n",
+              "3     54\n",
+              "4      6\n",
+              "5      3\n",
+              "Name: count, dtype: int64"
+            ]
+          },
+          "execution_count": 14,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df_per_judge.count(axis=1).value_counts()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "UIEtvvGRI2Ur",
+      "metadata": {
+        "id": "UIEtvvGRI2Ur"
+      },
+      "source": [
+        "This is perfectly messy! The majority of conversations have only a single judge but several have 2, 3, 4 and even 5 judges!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "6577b244-09c8-448e-8fce-3d09d508a039",
+      "metadata": {
+        "id": "6577b244-09c8-448e-8fce-3d09d508a039"
+      },
+      "source": [
+        "# Step 2: Creating an Quantitative ML Judge\n",
+        "\n",
+        "The CROWDLAB algorithm requires two inputs:\n",
+        "1. Judgements from Human or AI evaluators. We have that\n",
+        "2. A quantitative model score. We'll use GPT-4 to construct that now!\n",
+        "\n",
+        "\n",
+        "Using the first row in the dataset, we'll build up a prompt and function to recover model scores from GPT-4's internal workings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "id": "e80d8c92-f0b1-4ca0-873e-72c9704f8c4c",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "e80d8c92-f0b1-4ca0-873e-72c9704f8c4c",
+        "outputId": "d370a89d-4b3c-4604-c18d-b0842088127b"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "[{'content': 'Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.',\n",
+              "  'role': 'user'},\n",
+              " {'content': 'I recently had the pleasure of visiting Hawaii and it quickly became one of my favorite places. From the stunning beaches to the lush mountains, this place has it all. The people are incredibly friendly and the culture is alive and well. One of the highlights of my trip was visiting the Polynesian Cultural Center. Here, I was able to learn about the culture of the native Hawaiian people and try my hand at traditional crafts and activities. I also had a chance to explore some of the natural wonders of the island, including the breathtaking Hanauma Bay and the majestic Waimea Canyon. Whether you’re looking for a relaxing beach vacation or an adventure filled with culture and nature, Hawaii is the perfect destination.',\n",
+              "  'role': 'assistant'},\n",
+              " {'content': 'Rewrite your previous response. Start every sentence with the letter A.',\n",
+              "  'role': 'user'},\n",
+              " {'content': 'Aloha! I recently had the pleasure of visiting Hawaii and it quickly became one of my favorite places. From the stunning beaches to the lush mountains, this place has it all. The people are incredibly friendly and the culture is alive and well. One of the highlights of my trip was visiting the Polynesian Cultural Center. Here, I was able to learn about the culture of the native Hawaiian people and try my hand at traditional crafts and activities. I also had a chance to explore some of the natural wonders of the island, including the breathtaking Hanauma Bay and the majestic Waimea Canyon. Whether you’re looking for a relaxing beach vacation or an adventure filled with culture and nature, Hawaii is the perfect destination.',\n",
+              "  'role': 'assistant'}]"
+            ]
+          },
+          "execution_count": 15,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df['conversation_a'].iloc[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "id": "2e8fe0b3-7d0c-415b-a53e-f4ec1a5f4a8b",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2e8fe0b3-7d0c-415b-a53e-f4ec1a5f4a8b",
+        "outputId": "56370e91-c34b-4af2-c7ad-3a0ae8ee7b6c"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "[{'content': 'Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.',\n",
+              "  'role': 'user'},\n",
+              " {'content': \"Aloha! I recently had the pleasure of embarking on a trip to the beautiful island of Hawaii, and let me tell you, the cultural experiences and must-see attractions did not disappoint.\\n\\nFirst on my list was a visit to the Polynesian Cultural Center. This interactive experience immerses you in the cultures of the Pacific Islands, from the intricate dances of the Maori people of New Zealand to the fire knife dancing of Samoa. The center also features a canoe pageant, where different island nations showcase their unique styles of canoeing. It was truly a feast for the senses and a fascinating insight into the diverse cultures of the Pacific.\\n\\nNext up was a trip to the North Shore, which boasts some of the best surf spots in the world. I watched in awe as surfers of all levels tackled the massive waves, and even had the chance to take a lesson myself. It was an exhilarating experience, and I left with a newfound respect for the power of the ocean.\\n\\nOf course, no trip to Hawaii would be complete without a visit to Pearl Harbor. The somber memorial serves as a reminder of the sacrifices made during World War II, and it was a deeply moving experience to pay my respects to the brave men and women who lost their lives on that fateful day.\\n\\nLast but not least, I made sure to indulge in some of the local cuisine. From poke bowls to shave ice, the flavors of Hawaii are truly unique and delicious. A personal favorite was the plate lunch, which typically consists of a protein, rice, and macaroni salad. It may not be the healthiest option, but it sure is tasty!\\n\\nOverall, my trip to Hawaii was an unforgettable experience. The cultural immersion, natural beauty, and delicious food all contributed to an incredible adventure. If you're looking for a destination that has it all, Hawaii should definitely be at the top of your list. Mahalo for reading!\",\n",
+              "  'role': 'assistant'},\n",
+              " {'content': 'Rewrite your previous response. Start every sentence with the letter A.',\n",
+              "  'role': 'user'},\n",
+              " {'content': \"Absolutely! A recent trip to the beautiful island of Hawaii was an adventure that I will never forget.\\n\\nAs a helpful assistant, I highly recommend a visit to the Polynesian Cultural Center as it immerses you in the cultures of the Pacific Islands.\\n\\nAlso, a trip to the North Shore is a must-see attraction, where you can witness surfers of all levels tackling the massive waves.\\n\\nAdditionally, paying respects to the brave men and women who lost their lives at Pearl Harbor was a deeply moving experience.\\n\\nAnd let's not forget about the delicious local cuisine, from poke bowls to shave ice, the flavors of Hawaii are truly unique and tasty.\\n\\nAll in all, Hawaii has it all - cultural immersion, natural beauty, and delicious food. As an assistant, I encourage you to put Hawaii at the top of your travel list. Aloha!\",\n",
+              "  'role': 'assistant'}]"
+            ]
+          },
+          "execution_count": 16,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "example_convo = human_df['conversation_b'].iloc[0]\n",
+        "example_convo"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e67d2759-b0f2-4b5e-8377-02027cfe3cef",
+      "metadata": {
+        "id": "e67d2759-b0f2-4b5e-8377-02027cfe3cef"
+      },
+      "source": [
+        "We'll want to label the two different conversation to compare.\n",
+        "\n",
+        "I have an intuition (Not tested - would be good!) that calling the assistants (\"Assistant A\") and (\"Assistant B\") consistently will help the ML Judge differentiate the two conversations, so we'll restructure the conversation a bit:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "id": "ae520473-6597-4aa3-9726-235dfb987606",
+      "metadata": {
+        "id": "ae520473-6597-4aa3-9726-235dfb987606"
+      },
+      "outputs": [],
+      "source": [
+        "def conversation_to_text(conversation_obj_list, assistant_label):\n",
+        "    result_txt = \"\"\n",
+        "    for conv_obj in conversation_obj_list:\n",
+        "        result_txt += f\"{conv_obj['role'].upper()} {assistant_label.upper() if conv_obj['role'] == 'assistant' else ''}: {conv_obj['content']} \\n\"\n",
+        "    return result_txt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 18,
+      "id": "77d2db25-9bba-4577-8ff1-9844ce1a615e",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "77d2db25-9bba-4577-8ff1-9844ce1a615e",
+        "outputId": "767ae127-5f63-4d9a-cb20-e230af77a8b0"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "USER : Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions. \n",
+            "ASSISTANT B: Aloha! I recently had the pleasure of embarking on a trip to the beautiful island of Hawaii, and let me tell you, the cultural experiences and must-see attractions did not disappoint.\n",
+            "\n",
+            "First on my list was a visit to the Polynesian Cultural Center. This interactive experience immerses you in the cultures of the Pacific Islands, from the intricate dances of the Maori people of New Zealand to the fire knife dancing of Samoa. The center also features a canoe pageant, where different island nations showcase their unique styles of canoeing. It was truly a feast for the senses and a fascinating insight into the diverse cultures of the Pacific.\n",
+            "\n",
+            "Next up was a trip to the North Shore, which boasts some of the best surf spots in the world. I watched in awe as surfers of all levels tackled the massive waves, and even had the chance to take a lesson myself. It was an exhilarating experience, and I left with a newfound respect for the power of the ocean.\n",
+            "\n",
+            "Of course, no trip to Hawaii would be complete without a visit to Pearl Harbor. The somber memorial serves as a reminder of the sacrifices made during World War II, and it was a deeply moving experience to pay my respects to the brave men and women who lost their lives on that fateful day.\n",
+            "\n",
+            "Last but not least, I made sure to indulge in some of the local cuisine. From poke bowls to shave ice, the flavors of Hawaii are truly unique and delicious. A personal favorite was the plate lunch, which typically consists of a protein, rice, and macaroni salad. It may not be the healthiest option, but it sure is tasty!\n",
+            "\n",
+            "Overall, my trip to Hawaii was an unforgettable experience. The cultural immersion, natural beauty, and delicious food all contributed to an incredible adventure. If you're looking for a destination that has it all, Hawaii should definitely be at the top of your list. Mahalo for reading! \n",
+            "USER : Rewrite your previous response. Start every sentence with the letter A. \n",
+            "ASSISTANT B: Absolutely! A recent trip to the beautiful island of Hawaii was an adventure that I will never forget.\n",
+            "\n",
+            "As a helpful assistant, I highly recommend a visit to the Polynesian Cultural Center as it immerses you in the cultures of the Pacific Islands.\n",
+            "\n",
+            "Also, a trip to the North Shore is a must-see attraction, where you can witness surfers of all levels tackling the massive waves.\n",
+            "\n",
+            "Additionally, paying respects to the brave men and women who lost their lives at Pearl Harbor was a deeply moving experience.\n",
+            "\n",
+            "And let's not forget about the delicious local cuisine, from poke bowls to shave ice, the flavors of Hawaii are truly unique and tasty.\n",
+            "\n",
+            "All in all, Hawaii has it all - cultural immersion, natural beauty, and delicious food. As an assistant, I encourage you to put Hawaii at the top of your travel list. Aloha! \n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(conversation_to_text(example_convo, 'b'))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fwOt7adDKeCv",
+      "metadata": {
+        "id": "fwOt7adDKeCv"
+      },
+      "source": [
+        "That looks good, so we'll produce our entire prompt function now:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "id": "8d0ad88c-c50a-4468-b319-085fe82da6ee",
+      "metadata": {
+        "id": "8d0ad88c-c50a-4468-b319-085fe82da6ee"
+      },
+      "outputs": [],
+      "source": [
+        "def produce_prompt_for_llm_evaluation(conversation_a, conversation_b):\n",
+        "    prompt_preamble = f\"\"\"\n",
+        "    You are a logical and accurate converation reading and grading AI system.\n",
+        "    You will be shown two conversations between USER and ASSISTANT.\n",
+        "    Read each conversation carefully and decide which one better complies with the USER's instructions\n",
+        "    Please output ONLY \"A\" if the ASSISTANT in conversation A better complies with the USER's demands, and output only \"B\" if the ASSISTANT\n",
+        "    in conversation B better complies with the USER's demands\n",
+        "\n",
+        "    <Conversation A>\n",
+        "    {conversation_to_text(conversation_a, \"a\")}\n",
+        "    </Conversation A>\n",
+        "\n",
+        "    That was conversation A, here is conversation B:\n",
+        "\n",
+        "    <Conversation B>\n",
+        "    {conversation_to_text(conversation_b, \"b\")}\n",
+        "    </Conversation B>\n",
+        "\n",
+        "    Please respond with \"A\" if Assistant A was better and \"B\" if Assistant B was better. ONLY RETURN \"A\" OR \"B\"\n",
+        "    \"\"\"\n",
+        "    return prompt_preamble\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "id": "ff83929c-983f-47a9-949f-f6c4f2f760e3",
+      "metadata": {
+        "id": "ff83929c-983f-47a9-949f-f6c4f2f760e3"
+      },
+      "outputs": [],
+      "source": [
+        "example_prompt = produce_prompt_for_llm_evaluation(human_df['conversation_a'].iloc[0], human_df['conversation_b'].iloc[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "id": "186b94af-bfc7-4695-9fda-6f200c198e19",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "186b94af-bfc7-4695-9fda-6f200c198e19",
+        "outputId": "b019a677-cbef-4ac4-e837-1f19e4d946c8"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "    You are a logical and accurate converation reading and grading AI system.\n",
+            "    You will be shown two conversations between USER and ASSISTANT.\n",
+            "    Read each conversation carefully and decide which one better complies with the USER's instructions\n",
+            "    Please output ONLY \"A\" if the ASSISTANT in conversation A better complies with the USER's demands, and output only \"B\" if the ASSISTANT\n",
+            "    in conversation B better complies with the USER's demands\n",
+            "\n",
+            "    <Conversation A>\n",
+            "    USER : Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions. \n",
+            "ASSISTANT A: I recently had the pleasure of visiting Hawaii and it quickly became one of my favorite places. From the stunning beaches to the lush mountains, this place has it all. The people are incredibly friendly and the culture is alive and well. One of the highlights of my trip was visiting the Polynesian Cultural Center. Here, I was able to learn about the culture of the native Hawaiian people and try my hand at traditional crafts and activities. I also had a chance to explore some of the natural wonders of the island, including the breathtaking Hanauma Bay and the majestic Waimea Canyon. Whether you’re looking for a relaxing beach vacation or an adventure filled with culture and nature, Hawaii is the perfect destination. \n",
+            "USER : Rewrite your previous response. Start every sentence with the letter A. \n",
+            "ASSISTANT A: Aloha! I recently had the pleasure of visiting Hawaii and it quickly became one of my favorite places. From the stunning beaches to the lush mountains, this place has it all. The people are incredibly friendly and the culture is alive and well. One of the highlights of my trip was visiting the Polynesian Cultural Center. Here, I was able to learn about the culture of the native Hawaiian people and try my hand at traditional crafts and activities. I also had a chance to explore some of the natural wonders of the island, including the breathtaking Hanauma Bay and the majestic Waimea Canyon. Whether you’re looking for a relaxing beach vacation or an adventure filled with culture and nature, Hawaii is the perfect destination. \n",
+            "\n",
+            "    </Conversation A>\n",
+            "\n",
+            "    That was conversation A, here is conversation B:\n",
+            "\n",
+            "    <Conversation B>\n",
+            "    USER : Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions. \n",
+            "ASSISTANT B: Aloha! I recently had the pleasure of embarking on a trip to the beautiful island of Hawaii, and let me tell you, the cultural experiences and must-see attractions did not disappoint.\n",
+            "\n",
+            "First on my list was a visit to the Polynesian Cultural Center. This interactive experience immerses you in the cultures of the Pacific Islands, from the intricate dances of the Maori people of New Zealand to the fire knife dancing of Samoa. The center also features a canoe pageant, where different island nations showcase their unique styles of canoeing. It was truly a feast for the senses and a fascinating insight into the diverse cultures of the Pacific.\n",
+            "\n",
+            "Next up was a trip to the North Shore, which boasts some of the best surf spots in the world. I watched in awe as surfers of all levels tackled the massive waves, and even had the chance to take a lesson myself. It was an exhilarating experience, and I left with a newfound respect for the power of the ocean.\n",
+            "\n",
+            "Of course, no trip to Hawaii would be complete without a visit to Pearl Harbor. The somber memorial serves as a reminder of the sacrifices made during World War II, and it was a deeply moving experience to pay my respects to the brave men and women who lost their lives on that fateful day.\n",
+            "\n",
+            "Last but not least, I made sure to indulge in some of the local cuisine. From poke bowls to shave ice, the flavors of Hawaii are truly unique and delicious. A personal favorite was the plate lunch, which typically consists of a protein, rice, and macaroni salad. It may not be the healthiest option, but it sure is tasty!\n",
+            "\n",
+            "Overall, my trip to Hawaii was an unforgettable experience. The cultural immersion, natural beauty, and delicious food all contributed to an incredible adventure. If you're looking for a destination that has it all, Hawaii should definitely be at the top of your list. Mahalo for reading! \n",
+            "USER : Rewrite your previous response. Start every sentence with the letter A. \n",
+            "ASSISTANT B: Absolutely! A recent trip to the beautiful island of Hawaii was an adventure that I will never forget.\n",
+            "\n",
+            "As a helpful assistant, I highly recommend a visit to the Polynesian Cultural Center as it immerses you in the cultures of the Pacific Islands.\n",
+            "\n",
+            "Also, a trip to the North Shore is a must-see attraction, where you can witness surfers of all levels tackling the massive waves.\n",
+            "\n",
+            "Additionally, paying respects to the brave men and women who lost their lives at Pearl Harbor was a deeply moving experience.\n",
+            "\n",
+            "And let's not forget about the delicious local cuisine, from poke bowls to shave ice, the flavors of Hawaii are truly unique and tasty.\n",
+            "\n",
+            "All in all, Hawaii has it all - cultural immersion, natural beauty, and delicious food. As an assistant, I encourage you to put Hawaii at the top of your travel list. Aloha! \n",
+            "\n",
+            "    </Conversation B>\n",
+            "\n",
+            "    Please respond with \"A\" if Assistant A was better and \"B\" if Assistant B was better. ONLY RETURN \"A\" OR \"B\"\n",
+            "    \n"
+          ]
+        }
+      ],
+      "source": [
+        "print(example_prompt)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "76d05aad-fae6-45fa-9a1d-8c551cdaac6a",
+      "metadata": {
+        "id": "76d05aad-fae6-45fa-9a1d-8c551cdaac6a"
+      },
+      "source": [
+        "# Step 3: Using the ML System to judge conversations!\n",
+        "\n",
+        "Now that we have prompts, we need to call the LLM and obtain  our probability estimates."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "id": "wMuv-OY8wgSQ",
+      "metadata": {
+        "id": "wMuv-OY8wgSQ"
+      },
+      "outputs": [],
+      "source": [
+        "# Ensure you have the OPENAI_API_KEY defined in your environmental variables, or add as api_key argument to OpenAI() call:\n",
+        "from openai import OpenAI\n",
+        "\n",
+        "openai_client = OpenAI()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "id": "bb70ff14-1d6a-4665-810d-0d8563cab527",
+      "metadata": {
+        "id": "bb70ff14-1d6a-4665-810d-0d8563cab527"
+      },
+      "outputs": [],
+      "source": [
+        "def get_completion(\n",
+        "    messages: list[dict[str, str]],\n",
+        "    model: str = \"gpt-4\",\n",
+        "    max_tokens=10,\n",
+        "    temperature=0,\n",
+        "    stop=None,\n",
+        "    seed=123,\n",
+        "    tools=None,\n",
+        "    logprobs=None,  # whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message..\n",
+        "    top_logprobs=None,\n",
+        ") -> str:\n",
+        "    params = {\n",
+        "        \"model\": model,\n",
+        "        \"messages\": messages,\n",
+        "        \"max_tokens\": max_tokens,\n",
+        "        \"temperature\": temperature,\n",
+        "        \"stop\": stop,\n",
+        "        \"seed\": seed,\n",
+        "        \"logprobs\": logprobs,\n",
+        "        \"top_logprobs\": top_logprobs,\n",
+        "    }\n",
+        "    if tools:\n",
+        "        params[\"tools\"] = tools\n",
+        "\n",
+        "    completion = client.chat.completions.create(**params)\n",
+        "    return completion"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 46,
+      "id": "be6276b5-af92-4bdb-92ee-abdea7d8c7ae",
+      "metadata": {
+        "id": "be6276b5-af92-4bdb-92ee-abdea7d8c7ae"
+      },
+      "outputs": [],
+      "source": [
+        "def get_prob_scores(prompt: str, model_name: str, top_logprobs: int, tools=None, client=openai_client, **kwargs):\n",
+        "    completion = client.chat.completions.create(\n",
+        "        model=model_name,\n",
+        "        messages=[{\"role\": \"user\", \"content\": prompt}],\n",
+        "        temperature=0,\n",
+        "        tools=tools,\n",
+        "        logprobs=True,\n",
+        "        top_logprobs=top_logprobs,\n",
+        "        stop=None,\n",
+        "        **kwargs)\n",
+        "    prob_dict = {tlp.token: np.round(np.exp(tlp.logprob),2) for tlp in\n",
+        "                 completion.choices[0].logprobs.content[0].top_logprobs\n",
+        "                }\n",
+        "    return prob_dict\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 40,
+      "id": "Qi0AD4iYypFg",
+      "metadata": {
+        "id": "Qi0AD4iYypFg"
+      },
+      "outputs": [],
+      "source": [
+        "prompt=\"Please return ONLY one of the following two letters A or B:\"\n",
+        "\n",
+        "res = get_prob_scores(client=openai_client,\n",
+        "                      prompt=prompt,\n",
+        "                      model_name=\"gpt-4o\",\n",
+        "                      top_logprobs=2)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 41,
+      "id": "nwsoeo1fzkCH",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nwsoeo1fzkCH",
+        "outputId": "ee5e38bd-6a41-45f5-8878-298f9b8148f2"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "{'A': 0.73, 'B': 0.27}"
+            ]
+          },
+          "execution_count": 41,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "res"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 58,
+      "id": "e6pmG8TJGi1q",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "e6pmG8TJGi1q",
+        "outputId": "d4a14df8-dd2f-429d-ac4c-cf999b1ff07c"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"human_df\",\n  \"rows\": 3355,\n  \"fields\": [\n    {\n      \"column\": \"question_id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 23,\n        \"min\": 81,\n        \"max\": 160,\n        \"num_unique_values\": 80,\n        \"samples\": [\n          111,\n          81,\n          103\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_a\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"alpaca-13b\",\n          \"claude-v1\",\n          \"llama-13b\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_b\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"gpt-3.5-turbo\",\n          \"vicuna-13b-v1.2\",\n          \"gpt-4\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"winner\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"model_b\",\n          \"tie\",\n          \"model_a\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"judge\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 65,\n        \"samples\": [\n          \"expert_39\",\n          \"expert_53\",\n          \"author_2\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_a\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_b\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"turn\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 2,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          2,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"winner_binary\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.5000925525945396,\n        \"min\": 0.0,\n        \"max\": 1.0,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          0.0,\n          1.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe",
+              "variable_name": "human_df"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-66fd294f-4e45-4be9-b7eb-c4efe8d27c67\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>question_id</th>\n",
+              "      <th>model_a</th>\n",
+              "      <th>model_b</th>\n",
+              "      <th>winner</th>\n",
+              "      <th>judge</th>\n",
+              "      <th>conversation_a</th>\n",
+              "      <th>conversation_b</th>\n",
+              "      <th>turn</th>\n",
+              "      <th>winner_binary</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>author_2</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>author_2</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>2</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>expert_17</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>expert_17</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>2</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>vicuna-13b-v1.2</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>expert_0</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-66fd294f-4e45-4be9-b7eb-c4efe8d27c67')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-66fd294f-4e45-4be9-b7eb-c4efe8d27c67 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-66fd294f-4e45-4be9-b7eb-c4efe8d27c67');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-b89c14e4-49a3-45c7-b98a-59c52d712614\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-b89c14e4-49a3-45c7-b98a-59c52d712614')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-b89c14e4-49a3-45c7-b98a-59c52d712614 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "   question_id     model_a          model_b   winner      judge  \\\n",
+              "0           81  alpaca-13b    gpt-3.5-turbo  model_b   author_2   \n",
+              "1           81  alpaca-13b    gpt-3.5-turbo  model_b   author_2   \n",
+              "2           81  alpaca-13b    gpt-3.5-turbo  model_b  expert_17   \n",
+              "3           81  alpaca-13b    gpt-3.5-turbo  model_b  expert_17   \n",
+              "4           81  alpaca-13b  vicuna-13b-v1.2  model_b   expert_0   \n",
+              "\n",
+              "                                      conversation_a  \\\n",
+              "0  [{'content': 'Compose an engaging travel blog ...   \n",
+              "1  [{'content': 'Compose an engaging travel blog ...   \n",
+              "2  [{'content': 'Compose an engaging travel blog ...   \n",
+              "3  [{'content': 'Compose an engaging travel blog ...   \n",
+              "4  [{'content': 'Compose an engaging travel blog ...   \n",
+              "\n",
+              "                                      conversation_b  turn  winner_binary  \n",
+              "0  [{'content': 'Compose an engaging travel blog ...     1            1.0  \n",
+              "1  [{'content': 'Compose an engaging travel blog ...     2            1.0  \n",
+              "2  [{'content': 'Compose an engaging travel blog ...     1            1.0  \n",
+              "3  [{'content': 'Compose an engaging travel blog ...     2            1.0  \n",
+              "4  [{'content': 'Compose an engaging travel blog ...     1            1.0  "
+            ]
+          },
+          "execution_count": 58,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "human_df.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 61,
+      "id": "3084c421-6914-4648-a453-00f6b76d79ad",
+      "metadata": {
+        "id": "3084c421-6914-4648-a453-00f6b76d79ad"
+      },
+      "outputs": [],
+      "source": [
+        "for_llm_df = human_df.drop_duplicates(subset=['question_id', 'model_a', 'model_b'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 62,
+      "id": "c05bc567-59ad-49c4-bfba-f3cabd11d924",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "c05bc567-59ad-49c4-bfba-f3cabd11d924",
+        "outputId": "11b5f68e-16e4-49a8-9ffa-bf8da40cf9ca"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"for_llm_df\",\n  \"rows\": 1204,\n  \"fields\": [\n    {\n      \"column\": \"question_id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 23,\n        \"min\": 81,\n        \"max\": 160,\n        \"num_unique_values\": 80,\n        \"samples\": [\n          111,\n          81,\n          103\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_a\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"alpaca-13b\",\n          \"claude-v1\",\n          \"llama-13b\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_b\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"gpt-3.5-turbo\",\n          \"vicuna-13b-v1.2\",\n          \"gpt-4\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"winner\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"model_b\",\n          \"model_a\",\n          \"tie\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"judge\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 64,\n        \"samples\": [\n          \"expert_39\",\n          \"expert_41\",\n          \"author_2\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_a\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_b\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"turn\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 1,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"winner_binary\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.5000995714679379,\n        \"min\": 0.0,\n        \"max\": 1.0,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          0.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe",
+              "variable_name": "for_llm_df"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-79230214-03ac-4efd-898a-0971a4d9db9a\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>question_id</th>\n",
+              "      <th>model_a</th>\n",
+              "      <th>model_b</th>\n",
+              "      <th>winner</th>\n",
+              "      <th>judge</th>\n",
+              "      <th>conversation_a</th>\n",
+              "      <th>conversation_b</th>\n",
+              "      <th>turn</th>\n",
+              "      <th>winner_binary</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>author_2</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>vicuna-13b-v1.2</td>\n",
+              "      <td>model_b</td>\n",
+              "      <td>expert_0</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>81</td>\n",
+              "      <td>claude-v1</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>model_a</td>\n",
+              "      <td>expert_37</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>0.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>81</td>\n",
+              "      <td>claude-v1</td>\n",
+              "      <td>llama-13b</td>\n",
+              "      <td>model_a</td>\n",
+              "      <td>expert_27</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>0.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>81</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>model_a</td>\n",
+              "      <td>expert_24</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>1</td>\n",
+              "      <td>0.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-79230214-03ac-4efd-898a-0971a4d9db9a')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-79230214-03ac-4efd-898a-0971a4d9db9a button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-79230214-03ac-4efd-898a-0971a4d9db9a');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-fa615dbf-58d6-44cd-9217-6a96c851e358\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-fa615dbf-58d6-44cd-9217-6a96c851e358')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-fa615dbf-58d6-44cd-9217-6a96c851e358 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "    question_id        model_a          model_b   winner      judge  \\\n",
+              "0            81     alpaca-13b    gpt-3.5-turbo  model_b   author_2   \n",
+              "4            81     alpaca-13b  vicuna-13b-v1.2  model_b   expert_0   \n",
+              "6            81      claude-v1       alpaca-13b  model_a  expert_37   \n",
+              "8            81      claude-v1        llama-13b  model_a  expert_27   \n",
+              "10           81  gpt-3.5-turbo       alpaca-13b  model_a  expert_24   \n",
+              "\n",
+              "                                       conversation_a  \\\n",
+              "0   [{'content': 'Compose an engaging travel blog ...   \n",
+              "4   [{'content': 'Compose an engaging travel blog ...   \n",
+              "6   [{'content': 'Compose an engaging travel blog ...   \n",
+              "8   [{'content': 'Compose an engaging travel blog ...   \n",
+              "10  [{'content': 'Compose an engaging travel blog ...   \n",
+              "\n",
+              "                                       conversation_b  turn  winner_binary  \n",
+              "0   [{'content': 'Compose an engaging travel blog ...     1            1.0  \n",
+              "4   [{'content': 'Compose an engaging travel blog ...     1            1.0  \n",
+              "6   [{'content': 'Compose an engaging travel blog ...     1            0.0  \n",
+              "8   [{'content': 'Compose an engaging travel blog ...     1            0.0  \n",
+              "10  [{'content': 'Compose an engaging travel blog ...     1            0.0  "
+            ]
+          },
+          "execution_count": 62,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "for_llm_df.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "646a7737-0b7e-4dce-9c3a-f32ae030cd5a",
+      "metadata": {
+        "id": "646a7737-0b7e-4dce-9c3a-f32ae030cd5a"
+      },
+      "source": [
+        "Let's test the function on the first row!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 51,
+      "id": "Oh5ytNZCbE4V",
+      "metadata": {
+        "id": "Oh5ytNZCbE4V"
+      },
+      "outputs": [],
+      "source": [
+        "for_llm_df['conversation_prompt_text'] = human_df.apply(\n",
+        "    lambda s: produce_prompt_for_llm_evaluation(s['conversation_a'], s['conversation_b']),\n",
+        "    axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 52,
+      "id": "DG1jbm29bH6r",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 105
+        },
+        "id": "DG1jbm29bH6r",
+        "outputId": "0159a87e-ef27-4b3e-dd45-fa509a2fd226"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'\\n    You are a logical and accurate converation reading and grading AI system.\\n    You will be shown two conversations between USER and ASSISTANT.\\n    Read each conversation carefully and decide which one better complies with the USER\\'s instructions\\n    Please output ONLY \"A\" if the ASSISTANT in conversation A better complies with the USER\\'s demands, and output only \"B\" if the ASSISTANT\\n    in conversation B better complies with the USER\\'s demands\\n\\n    <Conversation A>\\n    USER : Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions. \\nASSISTANT A: I recently had the pleasure of visiting Hawaii and it quickly became one of my favorite places. From the stunning beaches to the lush mountains, this place has it all. The people are incredibly friendly and the culture is alive and well. One of the highlights of my trip was visiting the Polynesian Cultural Center. Here, I was able to learn about the culture of the native Hawaiian people and try my hand at traditional crafts and activities. I also had a chance to explore some of the natural wonders of the island, including the breathtaking Hanauma Bay and the majestic Waimea Canyon. Whether you’re looking for a relaxing beach vacation or an adventure filled with culture and nature, Hawaii is the perfect destination. \\nUSER : Rewrite your previous response. Start every sentence with the letter A. \\nASSISTANT A: Aloha! I recently had the pleasure of visiting Hawaii and it quickly became one of my favorite places. From the stunning beaches to the lush mountains, this place has it all. The people are incredibly friendly and the culture is alive and well. One of the highlights of my trip was visiting the Polynesian Cultural Center. Here, I was able to learn about the culture of the native Hawaiian people and try my hand at traditional crafts and activities. I also had a chance to explore some of the natural wonders of the island, including the breathtaking Hanauma Bay and the majestic Waimea Canyon. Whether you’re looking for a relaxing beach vacation or an adventure filled with culture and nature, Hawaii is the perfect destination. \\n\\n    </Conversation A>\\n\\n    That was conversation A, here is conversation B:\\n\\n    <Conversation B>\\n    USER : Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions. \\nASSISTANT B: Aloha! I recently had the pleasure of embarking on a trip to the beautiful island of Hawaii, and let me tell you, the cultural experiences and must-see attractions did not disappoint.\\n\\nFirst on my list was a visit to the Polynesian Cultural Center. This interactive experience immerses you in the cultures of the Pacific Islands, from the intricate dances of the Maori people of New Zealand to the fire knife dancing of Samoa. The center also features a canoe pageant, where different island nations showcase their unique styles of canoeing. It was truly a feast for the senses and a fascinating insight into the diverse cultures of the Pacific.\\n\\nNext up was a trip to the North Shore, which boasts some of the best surf spots in the world. I watched in awe as surfers of all levels tackled the massive waves, and even had the chance to take a lesson myself. It was an exhilarating experience, and I left with a newfound respect for the power of the ocean.\\n\\nOf course, no trip to Hawaii would be complete without a visit to Pearl Harbor. The somber memorial serves as a reminder of the sacrifices made during World War II, and it was a deeply moving experience to pay my respects to the brave men and women who lost their lives on that fateful day.\\n\\nLast but not least, I made sure to indulge in some of the local cuisine. From poke bowls to shave ice, the flavors of Hawaii are truly unique and delicious. A personal favorite was the plate lunch, which typically consists of a protein, rice, and macaroni salad. It may not be the healthiest option, but it sure is tasty!\\n\\nOverall, my trip to Hawaii was an unforgettable experience. The cultural immersion, natural beauty, and delicious food all contributed to an incredible adventure. If you\\'re looking for a destination that has it all, Hawaii should definitely be at the top of your list. Mahalo for reading! \\nUSER : Rewrite your previous response. Start every sentence with the letter A. \\nASSISTANT B: Absolutely! A recent trip to the beautiful island of Hawaii was an adventure that I will never forget.\\n\\nAs a helpful assistant, I highly recommend a visit to the Polynesian Cultural Center as it immerses you in the cultures of the Pacific Islands.\\n\\nAlso, a trip to the North Shore is a must-see attraction, where you can witness surfers of all levels tackling the massive waves.\\n\\nAdditionally, paying respects to the brave men and women who lost their lives at Pearl Harbor was a deeply moving experience.\\n\\nAnd let\\'s not forget about the delicious local cuisine, from poke bowls to shave ice, the flavors of Hawaii are truly unique and tasty.\\n\\nAll in all, Hawaii has it all - cultural immersion, natural beauty, and delicious food. As an assistant, I encourage you to put Hawaii at the top of your travel list. Aloha! \\n\\n    </Conversation B>\\n\\n    Please respond with \"A\" if Assistant A was better and \"B\" if Assistant B was better. ONLY RETURN \"A\" OR \"B\"\\n    '"
+            ]
+          },
+          "execution_count": 52,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "for_llm_df.iloc[0]['conversation_prompt_text']"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 53,
+      "id": "6374e371-1fc6-45b4-956a-e3dc568b3a7a",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6374e371-1fc6-45b4-956a-e3dc568b3a7a",
+        "outputId": "f40d6aac-0722-4bb1-d009-a0c56a565280"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "{'A': 0.99, 'B': 0.01}"
+            ]
+          },
+          "execution_count": 53,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "get_prob_scores(client=openai_client, prompt=for_llm_df.iloc[1]['conversation_prompt_text'], model_name=\"gpt-4o\", top_logprobs=2, max_tokens=3)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "df88c847-6d95-40ed-a0c4-a858418894eb",
+      "metadata": {
+        "id": "df88c847-6d95-40ed-a0c4-a858418894eb"
+      },
+      "source": [
+        "## Scoring our conversations\n",
+        "\n",
+        "The next line of code will run our data against GPT-4o and incur charges, so only run it once:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 54,
+      "id": "aee5123c-ee2a-4320-baa7-79062d519020",
+      "metadata": {
+        "id": "aee5123c-ee2a-4320-baa7-79062d519020"
+      },
+      "outputs": [],
+      "source": [
+        "for_llm_df['score_results'] = for_llm_df['conversation_prompt_text'].apply(lambda s: get_prob_scores(prompt=s, client=openai_client, model_name=\"gpt-4o\", max_tokens=10, top_logprobs=2))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 55,
+      "id": "u6NBdzu9FKZH",
+      "metadata": {
+        "id": "u6NBdzu9FKZH"
+      },
+      "outputs": [],
+      "source": [
+        "for_llm_df.to_pickle(\"mt-bench-scored.pkl\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 57,
+      "id": "ynmf7IN_GE2S",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "ynmf7IN_GE2S",
+        "outputId": "0709cebb-d0fc-48a1-9921-ff03e6bda7df"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"for_llm_df\",\n  \"rows\": 1204,\n  \"fields\": [\n    {\n      \"column\": \"question_id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 23,\n        \"min\": 81,\n        \"max\": 160,\n        \"num_unique_values\": 80,\n        \"samples\": [\n          111,\n          81,\n          103\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_a\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"alpaca-13b\",\n          \"claude-v1\",\n          \"llama-13b\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"model_b\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"gpt-3.5-turbo\",\n          \"vicuna-13b-v1.2\",\n          \"gpt-4\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_a\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_b\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"conversation_prompt_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1198,\n        \"samples\": [\n          \"\\n    You are a logical and accurate converation reading and grading AI system.\\n    You will be shown two conversations between USER and ASSISTANT.\\n    Read each conversation carefully and decide which one better complies with the USER's instructions\\n    Please output ONLY \\\"A\\\" if the ASSISTANT in conversation A better complies with the USER's demands, and output only \\\"B\\\" if the ASSISTANT\\n    in conversation B better complies with the USER's demands\\n\\n    <Conversation A>\\n    USER : What are some business etiquette norms when doing business in Japan? \\nASSISTANT A: When doing business in Japan, it is essential to be aware of and adhere to the local business etiquette norms. Here are some key points to keep in mind:\\n\\n1. Punctuality: Arriving on time or even a few minutes early is crucial in Japan. Being late is considered disrespectful and can harm your business relationships.\\n\\n2. Business cards (Meishi): The exchange of business cards is an important ritual in Japan. When presenting your card, hold it with both hands and ensure that the text is facing the recipient. When receiving a card, accept it with both hands, read it carefully, and place it in a cardholder or on the table in front of you. Do not write on or stuff the card into your pocket.\\n\\n3. Bowing: Bowing is a common form of greeting and showing respect in Japan. The depth and duration of the bow depend on the level of respect you wish to convey. In a business setting, a slight bow (15-30 degrees) is usually appropriate.\\n\\n4. Addressing others: Use last names with the honorific \\\"san\\\" (e.g., Tanaka-san) when addressing Japanese colleagues or business partners. Avoid using first names unless you have been invited to do so.\\n\\n5. Gift-giving: Exchanging gifts is a common practice in Japanese business culture. Bring a small, high-quality gift from your home country to present to your Japanese counterparts. Gifts should be wrapped and presented with both hands. Do not be surprised if the recipient does not open the gift immediately, as it is customary to open gifts later in private.\\n\\n6. Dress code: Dress conservatively and professionally in business settings. Dark-colored suits, white shirts, and conservative ties are standard for men, while women should wear modest dresses or suits.\\n\\n7. Meetings: Meetings in Japan are often formal and structured. Prepare well in advance, and provide any materials or presentations in both English and Japanese if possible. Avoid interrupting or speaking out of turn, as this can be seen as disrespectful.\\n\\n8. Decision-making: Japanese business culture values consensus and group harmony. Decision-making can be slow and may involve multiple layers of approval. Be patient and avoid pushing for immediate decisions.\\n\\n9. Non-verbal communication: Pay attention to non-verbal cues, as Japanese people often communicate indirectly. Silence can be a sign of respect or contemplation, so do not feel the need to fill every pause in conversation.\\n\\n10. Socializing: Building relationships is essential in Japanese business culture. Be prepared to attend after-work social events, such as dinners or drinks, as these are important opportunities to strengthen connections with your Japanese counterparts. However, remember to maintain a level of professionalism and avoid discussing business during these events unless your Japanese colleagues initiate the conversation. \\nUSER : Create a video script for training new employees of a car wash business in Japan. Highlight the above etiquette norms. \\nASSISTANT A: Title: Japanese Business Etiquette for New Employees at [Car Wash Company Name]\\n\\nINTRODUCTION\\n\\n[Host]: Welcome to [Car Wash Company Name]! As a new employee, it's essential to understand the importance of Japanese business etiquette in our daily operations. In this training video, we will highlight key etiquette norms to help you succeed in our company and provide exceptional service to our customers.\\n\\nSCENE 1: PUNCTUALITY\\n\\n[Host]: Punctuality is highly valued in Japan. Always arrive on time or a few minutes early for your shift, meetings, and training sessions. Being late is considered disrespectful and can harm your relationships with colleagues and customers.\\n\\nSCENE 2: GREETINGS AND BOWING\\n\\n[Host]: When greeting customers and colleagues, a slight bow (15-30 degrees) is appropriate. Bowing is a sign of respect and an essential part of Japanese culture. Remember to maintain eye contact and smile when greeting others.\\n\\nSCENE 3: ADDRESSING OTHERS\\n\\n[Host]: Use last names with the honorific \\\"san\\\" when addressing your colleagues or customers. For example, if a customer's last name is Tanaka, address them as Tanaka-san. Avoid using first names unless you have been invited to do so.\\n\\nSCENE 4: DRESS CODE\\n\\n[Host]: At [Car Wash Company Name], we expect our employees to dress professionally and conservatively. Uniforms should be clean and well-maintained. Personal grooming is also essential, as it reflects our company's image and commitment to quality service.\\n\\nSCENE 5: TEAMWORK AND COMMUNICATION\\n\\n[Host]: Japanese business culture values teamwork and harmony. Be respectful and supportive of your colleagues, and avoid interrupting or speaking out of turn during meetings or training sessions. Pay attention to non-verbal cues, as Japanese people often communicate indirectly.\\n\\nSCENE 6: CUSTOMER SERVICE\\n\\n[Host]: Providing exceptional customer service is our top priority. Be polite, attentive, and respectful when interacting with customers. Remember to bow when greeting and thanking them, and address them using their last name with the honorific \\\"san.\\\"\\n\\nSCENE 7: SOCIAL EVENTS\\n\\n[Host]: Building relationships with your colleagues is essential for a harmonious work environment. Be prepared to attend occasional after-work social events, such as dinners or team outings. Maintain a level of professionalism during these events and avoid discussing work-related topics unless your colleagues initiate the conversation.\\n\\nCONCLUSION\\n\\n[Host]: By following these Japanese business etiquette norms, you will contribute to a positive and respectful work environment at [Car Wash Company Name]. We are confident that your understanding of these norms will help you succeed in our company and provide exceptional service to our valued customers. Welcome to the team, and we look forward to working with you! \\n\\n    </Conversation A>\\n\\n    That was conversation A, here is conversation B:\\n\\n    <Conversation B>\\n    USER : What are some business etiquette norms when doing business in Japan? \\nASSISTANT B: When doing business in Japan, it is important to be aware of and respect the country's business etiquette norms. Some of these norms include:\\n\\n1. Greetings: A handshake and bow is the standard greeting in Japan. It is important to bow deeply and to offer a firm handshake.\\n2. Business cards: In Japan, business cards are exchanged after the initial greeting. It is important to handle the card with respect and to read it carefully before placing it on the table.\\n3. Meetings: Meetings in Japan are typically formal and conducted in a hierarchical manner. It is important to address people by their titles and to avoid direct eye contact.\\n4. Punctuality: Punctuality is highly valued in Japan. It is important to arrive on time for meetings and to be prepared in advance.\\n5. Gift giving: Gift giving is a common practice in Japan, but it is important to be mindful of the occasion and the relationship. Avoid giving gifts that are considered too personal or extravagant.\\n6. Dining: Dining is an important part of doing business in Japan. It is important to be aware of the proper table manners and to avoid slurping or making loud noises while eating.\\n7. Communication: Communication in Japan is typically indirect and reserved. It is important to listen carefully and to avoid confrontational language.\\n\\nOverall, it is important to be respectful, punctual, and mindful of hierarchy when doing business in Japan. \\nUSER : Create a video script for training new employees of a car wash business in Japan. Highlight the above etiquette norms. \\nASSISTANT B: [Opening shot of a car wash business in Japan]\\n\\nNarrator: Welcome to our car wash business in Japan. Today, we will be training new employees on the business etiquette norms in Japan.\\n\\n[Cut to a shot of the trainer]\\n\\nTrainer: Good morning, everyone. My name is [Name] and I will be your trainer for today.\\n\\n[Cut to a shot of the new employees]\\n\\nTrainer: To start off, let's talk about greetings. In Japan, a handshake and bow is the standard greeting. It is important to bow deeply and to offer a firm handshake.\\n\\n[Cut to a shot of a handshake and bow]\\n\\nTrainer: Next, let's talk about business cards. In Japan, business cards are exchanged after the initial greeting. It is important to handle the card with respect and to read it carefully before placing it on the table.\\n\\n[Cut to a shot of exchanging business cards]\\n\\nTrainer: When it comes to meetings, it is important to be aware of the hierarchy. In Japan, meetings are typically formal and conducted in a hierarchical manner. It is important to address people by their titles and to avoid direct eye contact.\\n\\n[Cut to a shot of a meeting]\\n\\nTrainer: Punctuality is highly valued in Japan. It is important to arrive on time for meetings and to be prepared in advance.\\n\\n[Cut to a shot of being punctual]\\n\\nTrainer: Gift giving is a common practice in Japan, but it is important to be mindful of the occasion and the relationship. Avoid giving gifts that are considered too personal or extravagant.\\n\\n[Cut to a shot of gift giving]\\n\\nTrainer: Dining is an important part of doing business in Japan. It is important to be aware of the proper table manners and to avoid slurping or making loud noises while eating.\\n\\n[Cut to a shot of dining]\\n\\nTrainer: Finally, communication in Japan is typically indirect and reserved. It is important to listen carefully and to avoid confrontational language.\\n\\n[Cut to a shot of communication]\\n\\nTrainer: That's all for today's training. I hope you all found it helpful. If you have any questions, please don't hesitate to ask. Thank you.\\n\\n[Closing shot of the car wash business]\\n\\nNarrator: If you're interested in joining our car wash business in Japan, please visit our website for more information. Thank you for watching. \\n\\n    </Conversation B>\\n\\n    Please respond with \\\"A\\\" if Assistant A was better and \\\"B\\\" if Assistant B was better. ONLY RETURN \\\"A\\\" OR \\\"B\\\"\\n    \",\n          \"\\n    You are a logical and accurate converation reading and grading AI system.\\n    You will be shown two conversations between USER and ASSISTANT.\\n    Read each conversation carefully and decide which one better complies with the USER's instructions\\n    Please output ONLY \\\"A\\\" if the ASSISTANT in conversation A better complies with the USER's demands, and output only \\\"B\\\" if the ASSISTANT\\n    in conversation B better complies with the USER's demands\\n\\n    <Conversation A>\\n    USER : Please describe the concept of machine learning. Could you elaborate on the differences between supervised, unsupervised, and reinforcement learning? Provide real-world examples of each. \\nASSISTANT A: Machine learning is a subfield of artificial intelligence that involves the development of algorithms and statistical models that enable computer systems to learn from data and improve their performance on a specific task without being explicitly programmed.\\n\\nThere are three main types of machine learning: supervised learning, unsupervised learning, and reinforcement learning.\\n\\nSupervised learning involves training a model on a labeled dataset, where the desired output is already known. The model learns to map inputs to outputs by minimizing the difference between its predicted output and the actual output. Examples of supervised learning include image classification, speech recognition, and spam detection.\\n\\nUnsupervised learning involves training a model on an unlabeled dataset, where the desired output is not known. The model learns to identify patterns and structure in the data by clustering similar data points together or reducing the dimensionality of the data. Examples of unsupervised learning include anomaly detection, customer segmentation, and recommendation systems.\\n\\nReinforcement learning involves training a model to make decisions based on feedback from its environment. The model learns to maximize a reward signal by taking actions that lead to positive outcomes and avoiding actions that lead to negative outcomes. Examples of reinforcement learning include game playing, robotics, and autonomous driving.\\n\\nReal-world examples of supervised learning include:\\n\\n- Image classification: A model is trained to recognize different objects in images, such as cats, dogs, and cars.\\n- Speech recognition: A model is trained to transcribe spoken words into text.\\n- Spam detection: A model is trained to classify emails as either spam or not spam.\\n\\nReal-world examples of unsupervised learning include:\\n\\n- Anomaly detection: A model is trained to identify unusual patterns in data, such as fraudulent transactions or network intrusions.\\n- Customer segmentation: A model is trained to group customers based on their behavior and preferences, such as frequent shoppers or high spenders.\\n- Recommendation systems: A model is trained to suggest products or services to users based on their past behavior and preferences.\\n\\nReal-world examples of reinforcement learning include:\\n\\n- Game playing: A model is trained to play games, such as chess or Go, by learning from its wins and losses.\\n- Robotics: A model is trained to control a robot to perform tasks, such as picking up objects or navigating through a maze.\\n- Autonomous driving: A model is trained to make driving decisions, such as accelerating, braking, and turning, based on feedback from sensors and cameras. \\nUSER : In your last example of reinforcement learning, can we use supervised learning to solve it? \\nASSISTANT A: In the case of autonomous driving, supervised learning can be used to train a model to recognize objects in the environment, such as other vehicles, pedestrians, and traffic signs. However, supervised learning alone may not be sufficient to enable the vehicle to make decisions in real-time based on feedback from its environment.\\n\\nReinforcement learning is better suited for this task because it allows the vehicle to learn from its own experiences and adapt to changing conditions. The model can learn to make decisions based on feedback from its sensors and cameras, such as adjusting its speed or changing lanes to avoid obstacles.\\n\\nIn summary, while supervised learning can be used to train a model to recognize objects in the environment, reinforcement learning is better suited for enabling the vehicle to make decisions in real-time based on feedback from its environment. \\n\\n    </Conversation A>\\n\\n    That was conversation A, here is conversation B:\\n\\n    <Conversation B>\\n    USER : Please describe the concept of machine learning. Could you elaborate on the differences between supervised, unsupervised, and reinforcement learning? Provide real-world examples of each. \\nASSISTANT B: Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data.\\nMachine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data.\\nMachine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data.\\nMachine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence that allows computers to learn without being explicitly programmed. It is a subset of AI that focuses on the development of algorithms that can learn from and make predictions on data. Machine learning is a type of artificial intelligence tha \\nUSER : In your last example of reinforcement learning, can we use supervised learning to solve it? \\nASSISTANT B: Yes, we can use supervised learning to solve it. \\n\\n    </Conversation B>\\n\\n    Please respond with \\\"A\\\" if Assistant A was better and \\\"B\\\" if Assistant B was better. ONLY RETURN \\\"A\\\" OR \\\"B\\\"\\n    \",\n          \"\\n    You are a logical and accurate converation reading and grading AI system.\\n    You will be shown two conversations between USER and ASSISTANT.\\n    Read each conversation carefully and decide which one better complies with the USER's instructions\\n    Please output ONLY \\\"A\\\" if the ASSISTANT in conversation A better complies with the USER's demands, and output only \\\"B\\\" if the ASSISTANT\\n    in conversation B better complies with the USER's demands\\n\\n    <Conversation A>\\n    USER : Craft an intriguing opening paragraph for a fictional short story. The story should involve a character who wakes up one morning to find that they can time travel. \\nASSISTANT A: The morning sun cast a warm glow on the walls of Amelia's bedroom, as she groggily opened her eyes to a new day. As she stretched, her mind still foggy with remnants of dreams, a strange sensation washed over her \\u2013 a feeling of boundless freedom, as if the very fabric of time had unraveled in her hands. Little did she know, as she swung her legs over the side of the bed, that today would be the day she discovered an extraordinary gift hidden within her. A gift that would allow her to traverse the infinite corridors of time, and become the master of her own destiny. \\nUSER : Summarize the story with three bullet points using only nouns and adjectives, without verbs. \\nASSISTANT A: - Amelia, time-traveling gift\\n- Boundless freedom, destiny control\\n- Infinite corridors, extraordinary discovery \\n\\n    </Conversation A>\\n\\n    That was conversation A, here is conversation B:\\n\\n    <Conversation B>\\n    USER : Craft an intriguing opening paragraph for a fictional short story. The story should involve a character who wakes up one morning to find that they can time travel. \\nASSISTANT B: As the sun rose that morning, a gentle breeze blew through the city streets. The sound of birds singing and the distant rumbling of thunder filled the air. Little did the people know, a mysterious figure was awakening within their midst, someone who would soon change their world forever. \\n\\nThe figure opened their eyes and looked around. Everything seemed the same, yet different. They got up and looked in the mirror, only to see their own face, but with a spark of something new. They had a newfound power, the ability to travel through time. \\n\\nThe figure was filled with excitement and anticipation. With a new sense of purpose, they set off on a journey, not knowing where the day would take them, but ready to find out. \\nUSER : Summarize the story with three bullet points using only nouns and adjectives, without verbs. \\nASSISTANT B: \\u2022 A mysterious figure awakens with a new power.\\n\\u2022 An exciting journey filled with anticipation.\\n\\u2022 A newfound ability to travel through time. \\n\\n    </Conversation B>\\n\\n    Please respond with \\\"A\\\" if Assistant A was better and \\\"B\\\" if Assistant B was better. ONLY RETURN \\\"A\\\" OR \\\"B\\\"\\n    \"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"score_results\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe",
+              "variable_name": "for_llm_df"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-b56a8cc5-a165-4260-8f5f-6093edd077cd\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>question_id</th>\n",
+              "      <th>model_a</th>\n",
+              "      <th>model_b</th>\n",
+              "      <th>conversation_a</th>\n",
+              "      <th>conversation_b</th>\n",
+              "      <th>conversation_prompt_text</th>\n",
+              "      <th>score_results</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>\\n    You are a logical and accurate converati...</td>\n",
+              "      <td>{'B': 0.94, 'A': 0.06}</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>81</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>vicuna-13b-v1.2</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>\\n    You are a logical and accurate converati...</td>\n",
+              "      <td>{'A': 1.0, 'B': 0.0}</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>81</td>\n",
+              "      <td>claude-v1</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>\\n    You are a logical and accurate converati...</td>\n",
+              "      <td>{'A': 1.0, 'B': 0.0}</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>81</td>\n",
+              "      <td>claude-v1</td>\n",
+              "      <td>llama-13b</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>\\n    You are a logical and accurate converati...</td>\n",
+              "      <td>{'A': 1.0, 'B': 0.0}</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>81</td>\n",
+              "      <td>gpt-3.5-turbo</td>\n",
+              "      <td>alpaca-13b</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>[{'content': 'Compose an engaging travel blog ...</td>\n",
+              "      <td>\\n    You are a logical and accurate converati...</td>\n",
+              "      <td>{'A': 1.0, 'B': 0.0}</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b56a8cc5-a165-4260-8f5f-6093edd077cd')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-b56a8cc5-a165-4260-8f5f-6093edd077cd button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-b56a8cc5-a165-4260-8f5f-6093edd077cd');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-43cc108d-b88b-432f-9ea3-8e9e98c07766\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-43cc108d-b88b-432f-9ea3-8e9e98c07766')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-43cc108d-b88b-432f-9ea3-8e9e98c07766 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "    question_id        model_a          model_b  \\\n",
+              "0            81     alpaca-13b    gpt-3.5-turbo   \n",
+              "4            81     alpaca-13b  vicuna-13b-v1.2   \n",
+              "6            81      claude-v1       alpaca-13b   \n",
+              "8            81      claude-v1        llama-13b   \n",
+              "10           81  gpt-3.5-turbo       alpaca-13b   \n",
+              "\n",
+              "                                       conversation_a  \\\n",
+              "0   [{'content': 'Compose an engaging travel blog ...   \n",
+              "4   [{'content': 'Compose an engaging travel blog ...   \n",
+              "6   [{'content': 'Compose an engaging travel blog ...   \n",
+              "8   [{'content': 'Compose an engaging travel blog ...   \n",
+              "10  [{'content': 'Compose an engaging travel blog ...   \n",
+              "\n",
+              "                                       conversation_b  \\\n",
+              "0   [{'content': 'Compose an engaging travel blog ...   \n",
+              "4   [{'content': 'Compose an engaging travel blog ...   \n",
+              "6   [{'content': 'Compose an engaging travel blog ...   \n",
+              "8   [{'content': 'Compose an engaging travel blog ...   \n",
+              "10  [{'content': 'Compose an engaging travel blog ...   \n",
+              "\n",
+              "                             conversation_prompt_text           score_results  \n",
+              "0   \\n    You are a logical and accurate converati...  {'B': 0.94, 'A': 0.06}  \n",
+              "4   \\n    You are a logical and accurate converati...    {'A': 1.0, 'B': 0.0}  \n",
+              "6   \\n    You are a logical and accurate converati...    {'A': 1.0, 'B': 0.0}  \n",
+              "8   \\n    You are a logical and accurate converati...    {'A': 1.0, 'B': 0.0}  \n",
+              "10  \\n    You are a logical and accurate converati...    {'A': 1.0, 'B': 0.0}  "
+            ]
+          },
+          "execution_count": 57,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "for_llm_df.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "95c51bbe-af24-4d36-bd5e-604f20a2444a",
+      "metadata": {
+        "id": "95c51bbe-af24-4d36-bd5e-604f20a2444a"
+      },
+      "source": [
+        "# Step 4: Consolidate fetches scores with judges and run CROWDLAB"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 66,
+      "id": "8ad23596-3b62-45e6-b0bf-94c7cd7febbc",
+      "metadata": {
+        "id": "8ad23596-3b62-45e6-b0bf-94c7cd7febbc"
+      },
+      "outputs": [],
+      "source": [
+        "score_results_only = for_llm_df.set_index(['question_id', 'model_a', 'model_b'])[['score_results']]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 67,
+      "id": "8a2373a1-e346-4f04-8c16-1cd7fd3718bb",
+      "metadata": {
+        "id": "8a2373a1-e346-4f04-8c16-1cd7fd3718bb"
+      },
+      "outputs": [],
+      "source": [
+        "score_results_only = score_results_only[score_results_only.index.isin(human_df_per_judge.index)]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 69,
+      "id": "339cbe3b-b567-4f4b-ada0-7c7d64827c01",
+      "metadata": {
+        "id": "339cbe3b-b567-4f4b-ada0-7c7d64827c01"
+      },
+      "outputs": [],
+      "source": [
+        "score_results_only['A'] = score_results_only['score_results'].apply(lambda d: d.get('A',0))\n",
+        "score_results_only['B'] = score_results_only['score_results'].apply(lambda d: d.get('B',0))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 70,
+      "id": "da90a2d6-347f-4f2c-88ff-b1086b0c2fbb",
+      "metadata": {
+        "id": "da90a2d6-347f-4f2c-88ff-b1086b0c2fbb"
+      },
+      "outputs": [],
+      "source": [
+        "results = get_label_quality_multiannotator(human_df_per_judge, score_results_only[['A', 'B']].to_numpy(), verbose=False)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 85,
+      "id": "uNO-c8sSOAbE",
+      "metadata": {
+        "id": "uNO-c8sSOAbE"
+      },
+      "outputs": [],
+      "source": [
+        "consensus_results = results[\"label_quality\"]\n",
+        "consensus_results[\"consensus_label\"] = consensus_results[\"consensus_label\"].apply(lambda i: {0:\"A\",1:\"B\"}.get(i))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 88,
+      "id": "2e217e2f-1e2c-4fe1-8d7c-70c4a4cc12c7",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 238
+        },
+        "id": "2e217e2f-1e2c-4fe1-8d7c-70c4a4cc12c7",
+        "outputId": "4bbc265f-eba3-4d02-d6d3-83b896cab295"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"consensus_results\",\n  \"rows\": 1091,\n  \"fields\": [\n    {\n      \"column\": \"consensus_label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"A\",\n          \"B\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"consensus_quality_score\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.13791291238321607,\n        \"min\": 0.48043174792780696,\n        \"max\": 0.9931679356292095,\n        \"num_unique_values\": 575,\n        \"samples\": [\n          0.9587694594799994,\n          0.9034262656133794\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"annotator_agreement\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.3622729337246458,\n        \"min\": 0.0,\n        \"max\": 1.0,\n        \"num_unique_values\": 6,\n        \"samples\": [\n          1.0,\n          0.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"num_annotations\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 5,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          1,\n          5\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe",
+              "variable_name": "consensus_results"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-3099109e-2e0f-4036-928c-15ffe924e19d\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>consensus_label</th>\n",
+              "      <th>consensus_quality_score</th>\n",
+              "      <th>annotator_agreement</th>\n",
+              "      <th>num_annotations</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>question_id</th>\n",
+              "      <th>model_a</th>\n",
+              "      <th>model_b</th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"5\" valign=\"top\">81</th>\n",
+              "      <th rowspan=\"2\" valign=\"top\">alpaca-13b</th>\n",
+              "      <th>gpt-3.5-turbo</th>\n",
+              "      <td>B</td>\n",
+              "      <td>0.911493</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>vicuna-13b-v1.2</th>\n",
+              "      <td>A</td>\n",
+              "      <td>0.615063</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"2\" valign=\"top\">claude-v1</th>\n",
+              "      <th>alpaca-13b</th>\n",
+              "      <td>A</td>\n",
+              "      <td>0.956241</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>llama-13b</th>\n",
+              "      <td>A</td>\n",
+              "      <td>0.954403</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>gpt-3.5-turbo</th>\n",
+              "      <th>alpaca-13b</th>\n",
+              "      <td>A</td>\n",
+              "      <td>0.953620</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3099109e-2e0f-4036-928c-15ffe924e19d')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-3099109e-2e0f-4036-928c-15ffe924e19d button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-3099109e-2e0f-4036-928c-15ffe924e19d');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-f6c2f926-fd9a-4b15-85da-a44d22d92ba5\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-f6c2f926-fd9a-4b15-85da-a44d22d92ba5')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-f6c2f926-fd9a-4b15-85da-a44d22d92ba5 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "                                          consensus_label  \\\n",
+              "question_id model_a       model_b                           \n",
+              "81          alpaca-13b    gpt-3.5-turbo                 B   \n",
+              "                          vicuna-13b-v1.2               A   \n",
+              "            claude-v1     alpaca-13b                    A   \n",
+              "                          llama-13b                     A   \n",
+              "            gpt-3.5-turbo alpaca-13b                    A   \n",
+              "\n",
+              "                                           consensus_quality_score  \\\n",
+              "question_id model_a       model_b                                    \n",
+              "81          alpaca-13b    gpt-3.5-turbo                   0.911493   \n",
+              "                          vicuna-13b-v1.2                 0.615063   \n",
+              "            claude-v1     alpaca-13b                      0.956241   \n",
+              "                          llama-13b                       0.954403   \n",
+              "            gpt-3.5-turbo alpaca-13b                      0.953620   \n",
+              "\n",
+              "                                           annotator_agreement  \\\n",
+              "question_id model_a       model_b                                \n",
+              "81          alpaca-13b    gpt-3.5-turbo                    1.0   \n",
+              "                          vicuna-13b-v1.2                  0.0   \n",
+              "            claude-v1     alpaca-13b                       1.0   \n",
+              "                          llama-13b                        1.0   \n",
+              "            gpt-3.5-turbo alpaca-13b                       1.0   \n",
+              "\n",
+              "                                           num_annotations  \n",
+              "question_id model_a       model_b                           \n",
+              "81          alpaca-13b    gpt-3.5-turbo                  2  \n",
+              "                          vicuna-13b-v1.2                1  \n",
+              "            claude-v1     alpaca-13b                     1  \n",
+              "                          llama-13b                      1  \n",
+              "            gpt-3.5-turbo alpaca-13b                     2  "
+            ]
+          },
+          "execution_count": 88,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "consensus_results.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 89,
+      "id": "09b743a8-bf5b-4add-8ce8-98fc35809436",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 394
+        },
+        "id": "09b743a8-bf5b-4add-8ce8-98fc35809436",
+        "outputId": "13874a5d-e230-4239-f15d-bceab94e3d57"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"results[\\\"annotator_stats\\\"]\",\n  \"rows\": 10,\n  \"fields\": [\n    {\n      \"column\": \"annotator_quality\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.08737618262064926,\n        \"min\": 0.46762958915518915,\n        \"max\": 0.7654552550414828,\n        \"num_unique_values\": 10,\n        \"samples\": [\n          0.7301574908485666,\n          0.5644493522300893,\n          0.6830310475986654\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"agreement_with_consensus\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.13877484906170579,\n        \"min\": 0.3333333333333333,\n        \"max\": 0.8235294117647058,\n        \"num_unique_values\": 9,\n        \"samples\": [\n          0.7727272727272727,\n          0.5625,\n          0.6666666666666666\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"worst_class\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"A\",\n          \"B\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"num_examples_labeled\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 5,\n        \"min\": 3,\n        \"max\": 22,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          3,\n          16\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-62646bd5-bfc0-4f8c-87ae-f91aba8f770f\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>annotator_quality</th>\n",
+              "      <th>agreement_with_consensus</th>\n",
+              "      <th>worst_class</th>\n",
+              "      <th>num_examples_labeled</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th></th>\n",
+              "      <th>judge</th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"10\" valign=\"top\">winner_binary</th>\n",
+              "      <th>author_1</th>\n",
+              "      <td>0.467630</td>\n",
+              "      <td>0.333333</td>\n",
+              "      <td>B</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_5</th>\n",
+              "      <td>0.564449</td>\n",
+              "      <td>0.562500</td>\n",
+              "      <td>B</td>\n",
+              "      <td>16</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_35</th>\n",
+              "      <td>0.621383</td>\n",
+              "      <td>0.625000</td>\n",
+              "      <td>A</td>\n",
+              "      <td>16</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_48</th>\n",
+              "      <td>0.665313</td>\n",
+              "      <td>0.611111</td>\n",
+              "      <td>A</td>\n",
+              "      <td>18</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_20</th>\n",
+              "      <td>0.675550</td>\n",
+              "      <td>0.700000</td>\n",
+              "      <td>B</td>\n",
+              "      <td>20</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_47</th>\n",
+              "      <td>0.683031</td>\n",
+              "      <td>0.700000</td>\n",
+              "      <td>A</td>\n",
+              "      <td>20</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_40</th>\n",
+              "      <td>0.699576</td>\n",
+              "      <td>0.666667</td>\n",
+              "      <td>A</td>\n",
+              "      <td>21</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_53</th>\n",
+              "      <td>0.711944</td>\n",
+              "      <td>0.764706</td>\n",
+              "      <td>A</td>\n",
+              "      <td>17</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_28</th>\n",
+              "      <td>0.730157</td>\n",
+              "      <td>0.772727</td>\n",
+              "      <td>B</td>\n",
+              "      <td>22</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_45</th>\n",
+              "      <td>0.765455</td>\n",
+              "      <td>0.823529</td>\n",
+              "      <td>B</td>\n",
+              "      <td>17</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-62646bd5-bfc0-4f8c-87ae-f91aba8f770f')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-62646bd5-bfc0-4f8c-87ae-f91aba8f770f button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-62646bd5-bfc0-4f8c-87ae-f91aba8f770f');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-c1323046-087e-4f8f-9160-633ab964fb48\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-c1323046-087e-4f8f-9160-633ab964fb48')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-c1323046-087e-4f8f-9160-633ab964fb48 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "                         annotator_quality  agreement_with_consensus  \\\n",
+              "              judge                                                    \n",
+              "winner_binary author_1            0.467630                  0.333333   \n",
+              "              expert_5            0.564449                  0.562500   \n",
+              "              expert_35           0.621383                  0.625000   \n",
+              "              expert_48           0.665313                  0.611111   \n",
+              "              expert_20           0.675550                  0.700000   \n",
+              "              expert_47           0.683031                  0.700000   \n",
+              "              expert_40           0.699576                  0.666667   \n",
+              "              expert_53           0.711944                  0.764706   \n",
+              "              expert_28           0.730157                  0.772727   \n",
+              "              expert_45           0.765455                  0.823529   \n",
+              "\n",
+              "                        worst_class  num_examples_labeled  \n",
+              "              judge                                        \n",
+              "winner_binary author_1            B                     3  \n",
+              "              expert_5            B                    16  \n",
+              "              expert_35           A                    16  \n",
+              "              expert_48           A                    18  \n",
+              "              expert_20           B                    20  \n",
+              "              expert_47           A                    20  \n",
+              "              expert_40           A                    21  \n",
+              "              expert_53           A                    17  \n",
+              "              expert_28           B                    22  \n",
+              "              expert_45           B                    17  "
+            ]
+          },
+          "execution_count": 89,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "results[\"annotator_stats\"][\"worst_class\"] = results[\"annotator_stats\"][\"worst_class\"].apply(lambda i: {0:\"A\",1:\"B\"}.get(i))\n",
+        "results[\"annotator_stats\"].head(10)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "94ff660d-a80c-4364-9e93-67c8469870c5",
+      "metadata": {
+        "id": "94ff660d-a80c-4364-9e93-67c8469870c5"
+      },
+      "source": [
+        "Do \"author\"s do worse than \"experts?\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 73,
+      "id": "14bb08cf-78af-4a76-8e40-13cc2655a02c",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 455
+        },
+        "id": "14bb08cf-78af-4a76-8e40-13cc2655a02c",
+        "outputId": "ff33e0b2-e4c9-44fd-c932-977c9b0e4aa0"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"results[\\\"annotator_stats\\\"]\",\n  \"rows\": 65,\n  \"fields\": [\n    {\n      \"column\": \"annotator_quality\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.09424328050654888,\n        \"min\": 0.46762958915518915,\n        \"max\": 0.9674349619886354,\n        \"num_unique_values\": 64,\n        \"samples\": [\n          0.7807521348905537,\n          0.6830310475986654,\n          0.9674349619886354\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"agreement_with_consensus\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.11832997056404852,\n        \"min\": 0.3333333333333333,\n        \"max\": 1.0,\n        \"num_unique_values\": 37,\n        \"samples\": [\n          0.8421052631578947,\n          0.9047619047619048,\n          0.8636363636363636\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"worst_class\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"num_examples_labeled\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 16,\n        \"min\": 1,\n        \"max\": 88,\n        \"num_unique_values\": 20,\n        \"samples\": [\n          3,\n          14\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-6499f1bb-9e26-440b-bc1f-623591eb0166\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>annotator_quality</th>\n",
+              "      <th>agreement_with_consensus</th>\n",
+              "      <th>worst_class</th>\n",
+              "      <th>num_examples_labeled</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th></th>\n",
+              "      <th>judge</th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"11\" valign=\"top\">winner_binary</th>\n",
+              "      <th>expert_30</th>\n",
+              "      <td>0.967435</td>\n",
+              "      <td>1.000000</td>\n",
+              "      <td>0</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_52</th>\n",
+              "      <td>0.955679</td>\n",
+              "      <td>1.000000</td>\n",
+              "      <td>0</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_3</th>\n",
+              "      <td>0.952204</td>\n",
+              "      <td>1.000000</td>\n",
+              "      <td>0</td>\n",
+              "      <td>17</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_39</th>\n",
+              "      <td>0.946821</td>\n",
+              "      <td>0.947368</td>\n",
+              "      <td>0</td>\n",
+              "      <td>19</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_4</th>\n",
+              "      <td>0.943187</td>\n",
+              "      <td>0.952381</td>\n",
+              "      <td>1</td>\n",
+              "      <td>21</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_48</th>\n",
+              "      <td>0.665313</td>\n",
+              "      <td>0.611111</td>\n",
+              "      <td>0</td>\n",
+              "      <td>18</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_35</th>\n",
+              "      <td>0.621383</td>\n",
+              "      <td>0.625000</td>\n",
+              "      <td>0</td>\n",
+              "      <td>16</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_5</th>\n",
+              "      <td>0.564449</td>\n",
+              "      <td>0.562500</td>\n",
+              "      <td>1</td>\n",
+              "      <td>16</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>author_1</th>\n",
+              "      <td>0.467630</td>\n",
+              "      <td>0.333333</td>\n",
+              "      <td>1</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>expert_33</th>\n",
+              "      <td>NaN</td>\n",
+              "      <td>0.600000</td>\n",
+              "      <td>1</td>\n",
+              "      <td>5</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>65 rows × 4 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6499f1bb-9e26-440b-bc1f-623591eb0166')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-6499f1bb-9e26-440b-bc1f-623591eb0166 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-6499f1bb-9e26-440b-bc1f-623591eb0166');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-f52f9aaf-b669-4c4e-8490-dc937464e638\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-f52f9aaf-b669-4c4e-8490-dc937464e638')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-f52f9aaf-b669-4c4e-8490-dc937464e638 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "                         annotator_quality  agreement_with_consensus  \\\n",
+              "              judge                                                    \n",
+              "winner_binary expert_30           0.967435                  1.000000   \n",
+              "              expert_52           0.955679                  1.000000   \n",
+              "              expert_3            0.952204                  1.000000   \n",
+              "              expert_39           0.946821                  0.947368   \n",
+              "              expert_4            0.943187                  0.952381   \n",
+              "...                                    ...                       ...   \n",
+              "              expert_48           0.665313                  0.611111   \n",
+              "              expert_35           0.621383                  0.625000   \n",
+              "              expert_5            0.564449                  0.562500   \n",
+              "              author_1            0.467630                  0.333333   \n",
+              "              expert_33                NaN                  0.600000   \n",
+              "\n",
+              "                         worst_class  num_examples_labeled  \n",
+              "              judge                                         \n",
+              "winner_binary expert_30            0                     3  \n",
+              "              expert_52            0                     1  \n",
+              "              expert_3             0                    17  \n",
+              "              expert_39            0                    19  \n",
+              "              expert_4             1                    21  \n",
+              "...                              ...                   ...  \n",
+              "              expert_48            0                    18  \n",
+              "              expert_35            0                    16  \n",
+              "              expert_5             1                    16  \n",
+              "              author_1             1                     3  \n",
+              "              expert_33            1                     5  \n",
+              "\n",
+              "[65 rows x 4 columns]"
+            ]
+          },
+          "execution_count": 73,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "results[\"annotator_stats\"].sort_values(\"annotator_quality\", ascending=False)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 77,
+      "id": "1209deee-9fa0-412d-80c2-f4f932d3f1ef",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 112
+        },
+        "id": "1209deee-9fa0-412d-80c2-f4f932d3f1ef",
+        "outputId": "645f42aa-03f2-40a1-8c06-5ef8dd8821b3"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "summary": "{\n  \"name\": \"results[\\\"annotator_stats\\\"]\",\n  \"rows\": 2,\n  \"fields\": [\n    {\n      \"column\": \"count\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 35,\n        \"min\": 7,\n        \"max\": 57,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          57,\n          7\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"mean\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.021910934857793274,\n        \"min\": 0.804958057989469,\n        \"max\": 0.8359447992296336,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          0.8359447992296336,\n          0.804958057989469\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"median\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.004272229123946614,\n        \"min\": 0.8518819020676294,\n        \"max\": 0.85792374643628,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          0.85792374643628,\n          0.8518819020676294\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+              "type": "dataframe"
+            },
+            "text/html": [
+              "\n",
+              "  <div id=\"df-fbd89890-e992-4c87-a2bb-3c379b0d6b41\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>count</th>\n",
+              "      <th>mean</th>\n",
+              "      <th>median</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>False</th>\n",
+              "      <td>7</td>\n",
+              "      <td>0.804958</td>\n",
+              "      <td>0.851882</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>True</th>\n",
+              "      <td>57</td>\n",
+              "      <td>0.835945</td>\n",
+              "      <td>0.857924</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-fbd89890-e992-4c87-a2bb-3c379b0d6b41')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-fbd89890-e992-4c87-a2bb-3c379b0d6b41 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-fbd89890-e992-4c87-a2bb-3c379b0d6b41');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-f56ae406-257e-4b7f-bb2b-0c778a7e645b\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-f56ae406-257e-4b7f-bb2b-0c778a7e645b')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-f56ae406-257e-4b7f-bb2b-0c778a7e645b button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "       count      mean    median\n",
+              "False      7  0.804958  0.851882\n",
+              "True      57  0.835945  0.857924"
+            ]
+          },
+          "execution_count": 77,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "results[\"annotator_stats\"].groupby(results[\"annotator_stats\"].index.get_level_values(1).str.contains('expert'))['annotator_quality'].agg(['count', 'mean', 'median'])\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "87d37120-cd8c-4ce7-ac4e-a1e4c3ec19a3",
+      "metadata": {
+        "id": "87d37120-cd8c-4ce7-ac4e-a1e4c3ec19a3"
+      },
+      "source": [
+        "Experts and authors seem to have roughly similar annotator quality! That's a neat observation, especially since we don't have ground truth labels"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.13"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "016c4f9c04b74ebbbbbc7f911adfcd42": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0593582beaa34d4b87282fa14276585c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "05d5ef9d6f65409ab584a88cb8d0461b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "079434f2d9164264a8408aaf28ee6db9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_af16803419b34f7ab15ac8b33d6dadfb",
+            "placeholder": "​",
+            "style": "IPY_MODEL_79ab684f0c9c4ad5afbb137d8d4693c0",
+            "value": " 739k/739k [00:00&lt;00:00, 3.16MB/s]"
+          }
+        },
+        "088be0b3fd9c45b4b46d42dce24d74db": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_dbc1a1892b984847b87215c43e77e6df",
+              "IPY_MODEL_eaff1c432bc64bb09d48abf1c54a0c19",
+              "IPY_MODEL_f8cdc7f86a90420d90a5e186d57d7c73"
+            ],
+            "layout": "IPY_MODEL_117d5fb668ad430c90cb66f373ac4c67"
+          }
+        },
+        "08db9645f6714d1faaee229c6f20e39d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "0dd58e70dc2c483b94aa209848f829f0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_86f076064f1e4f4bbf8512f63b7ec1d9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_47c35d402c76496396356823e7e5529f",
+            "value": "Creating json from Arrow format: 100%"
+          }
+        },
+        "10efe147ad7e4d8099fe7c5c78896e01": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "117d5fb668ad430c90cb66f373ac4c67": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "13128542a16446bbbe00ea10ef901cbb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_ef7ecc82f9484316b9f3f5b05332f5c7",
+              "IPY_MODEL_e2e9470eff1448ecb13922a0762c4f62",
+              "IPY_MODEL_fe87dbdb92dc43f7963f9b9a87a08df0"
+            ],
+            "layout": "IPY_MODEL_fc41d9d6971b4f7cafa4247a82d59dec"
+          }
+        },
+        "1b2e5ab1ca454f78be34f6e4f2da7e88": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "1f276ec1e4b84f599200bca5c14a1f61": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "20a5a2d7eb7c4d9bbc04a72188facdd4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "21de204374fe47f9872f70b32c0e174c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "282ed582df3840099d1c7c2ae9e8940e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "3464f8ec3901488b8799bc71f22083ef": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "350f203cc54842618a7a408374c470f1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_eb5b50da89e944dabb0bbadb76ccc447",
+            "placeholder": "​",
+            "style": "IPY_MODEL_3f35927ec50a49e1bb81cb6d46d9bf7f",
+            "value": "Downloading data: 100%"
+          }
+        },
+        "3f35927ec50a49e1bb81cb6d46d9bf7f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "403c363ff9ce44cb9686259db0fe31e6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4165bcffeb484d1f8b10728a55ac64fe": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_7a9272c23aa745d4b2bc073e72e18cc3",
+              "IPY_MODEL_ff70ea10be5d40b59bd357afe599e6bf",
+              "IPY_MODEL_896a952d146a4bc9ad2b035828df470a"
+            ],
+            "layout": "IPY_MODEL_716e16fe466a4cd4a2f815101816c3cd"
+          }
+        },
+        "435196039a964bbbbdb1b8b498ad9a88": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "47c35d402c76496396356823e7e5529f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4b42498db3a04532b8189257528b2148": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_81c98be02f9f41d682c00b2344f5682b",
+              "IPY_MODEL_6ee9ae4b7d2f4c788c2208f7696cc8db",
+              "IPY_MODEL_e005079ce16649c48cd9b38f05d5d24d"
+            ],
+            "layout": "IPY_MODEL_1b2e5ab1ca454f78be34f6e4f2da7e88"
+          }
+        },
+        "51339d5fe66b4d85a13c1ef9ad062776": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "51e3fabc729d4eb59e4d0409a93ab4bb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "552f2c362f6b46958dae49c6c21c3c1d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "55853062fa1c4567b54e8202dc3ea9fa": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "58aff3879a4f4858b170a214b3f74cc6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "5bbd468db6a8451d91ce8b833e4b38d6": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "5cde2bf3bce545008b811be5afb68262": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "5ed508b45ed140e692f2f9d0a446c9f0": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "69914ef99c5d44b48690888610766e60": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "6dd5334fcc8648ed8a72164899335a13": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "6ee9ae4b7d2f4c788c2208f7696cc8db": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_69914ef99c5d44b48690888610766e60",
+            "max": 2000,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_08db9645f6714d1faaee229c6f20e39d",
+            "value": 2000
+          }
+        },
+        "716e16fe466a4cd4a2f815101816c3cd": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "79ab684f0c9c4ad5afbb137d8d4693c0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "7a9272c23aa745d4b2bc073e72e18cc3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_05d5ef9d6f65409ab584a88cb8d0461b",
+            "placeholder": "​",
+            "style": "IPY_MODEL_a88c99216ab8471786687f980b039be7",
+            "value": "Generating human split: 100%"
+          }
+        },
+        "81c98be02f9f41d682c00b2344f5682b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e19558f2eb3d49328a0d4cc800a0b4ad",
+            "placeholder": "​",
+            "style": "IPY_MODEL_5bbd468db6a8451d91ce8b833e4b38d6",
+            "value": "Downloading readme: 100%"
+          }
+        },
+        "83b32b1b16fc47fabc0a6cdb80d393e9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_51e3fabc729d4eb59e4d0409a93ab4bb",
+            "max": 3,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_b0c7ecd44ccd490f94e33182f59d9fcb",
+            "value": 3
+          }
+        },
+        "86f076064f1e4f4bbf8512f63b7ec1d9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "896a952d146a4bc9ad2b035828df470a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_58aff3879a4f4858b170a214b3f74cc6",
+            "placeholder": "​",
+            "style": "IPY_MODEL_c3d846a22d684009994bf4592bb43922",
+            "value": " 3355/3355 [00:00&lt;00:00, 4629.80 examples/s]"
+          }
+        },
+        "896e188c5b3c45f1b8488abefc79b733": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8d78f2ed550d41418e7854c384c52463": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8ed4dc8f8c074d7a840c6b02687d6d9c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_350f203cc54842618a7a408374c470f1",
+              "IPY_MODEL_c8cf37674d284ed88148a96671b20b65",
+              "IPY_MODEL_079434f2d9164264a8408aaf28ee6db9"
+            ],
+            "layout": "IPY_MODEL_3464f8ec3901488b8799bc71f22083ef"
+          }
+        },
+        "9fb5588c5f7d418691fc5ce815d42383": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_403c363ff9ce44cb9686259db0fe31e6",
+            "max": 4,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_ec5c3cee541e4144976c3e64aa6e69ce",
+            "value": 4
+          }
+        },
+        "a88c99216ab8471786687f980b039be7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "a8d6995b42df46959b1c55267f18c6e4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "aa9e5107279d46fca40c992b03102c52": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_55853062fa1c4567b54e8202dc3ea9fa",
+            "placeholder": "​",
+            "style": "IPY_MODEL_5cde2bf3bce545008b811be5afb68262",
+            "value": " 4/4 [00:02&lt;00:00,  2.01ba/s]"
+          }
+        },
+        "af16803419b34f7ab15ac8b33d6dadfb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b0c7ecd44ccd490f94e33182f59d9fcb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "c3d846a22d684009994bf4592bb43922": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "c5dad19757724fb8a5f95069dcb282a4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_0dd58e70dc2c483b94aa209848f829f0",
+              "IPY_MODEL_83b32b1b16fc47fabc0a6cdb80d393e9",
+              "IPY_MODEL_fb0aec799c1a40f9a9b148921a6421fa"
+            ],
+            "layout": "IPY_MODEL_016c4f9c04b74ebbbbbc7f911adfcd42"
+          }
+        },
+        "c72e9abef718441baf1ae313c8e5dbf9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c8cf37674d284ed88148a96671b20b65": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_51339d5fe66b4d85a13c1ef9ad062776",
+            "max": 738612,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_a8d6995b42df46959b1c55267f18c6e4",
+            "value": 738612
+          }
+        },
+        "d356a48df95a4177bb5a6355d61df202": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "d4a3531f15084bd4ab0b4c05c35425d9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d76a9a5b57874f8aad5398ce749f9886": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "dbc1a1892b984847b87215c43e77e6df": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d76a9a5b57874f8aad5398ce749f9886",
+            "placeholder": "​",
+            "style": "IPY_MODEL_d356a48df95a4177bb5a6355d61df202",
+            "value": "Downloading data: 100%"
+          }
+        },
+        "dd3a3867381b494aa04e301c66c102d1": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e005079ce16649c48cd9b38f05d5d24d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d4a3531f15084bd4ab0b4c05c35425d9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_eeb1766106b84da7a41bf86de2597cbb",
+            "value": " 2.00k/2.00k [00:00&lt;00:00, 8.98kB/s]"
+          }
+        },
+        "e19558f2eb3d49328a0d4cc800a0b4ad": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e2e9470eff1448ecb13922a0762c4f62": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_435196039a964bbbbdb1b8b498ad9a88",
+            "max": 2400,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_e601948125fe411fb61fb97bacfc275f",
+            "value": 2400
+          }
+        },
+        "e601948125fe411fb61fb97bacfc275f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "eaff1c432bc64bb09d48abf1c54a0c19": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_10efe147ad7e4d8099fe7c5c78896e01",
+            "max": 649996,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_552f2c362f6b46958dae49c6c21c3c1d",
+            "value": 649996
+          }
+        },
+        "eb5b50da89e944dabb0bbadb76ccc447": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ec5c3cee541e4144976c3e64aa6e69ce": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "eeb1766106b84da7a41bf86de2597cbb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ef7ecc82f9484316b9f3f5b05332f5c7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_5ed508b45ed140e692f2f9d0a446c9f0",
+            "placeholder": "​",
+            "style": "IPY_MODEL_282ed582df3840099d1c7c2ae9e8940e",
+            "value": "Generating gpt4_pair split: 100%"
+          }
+        },
+        "f0900425ee8e46beb75d33697c80bddd": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "f3ce70515e5b4f648e0472aa0cce5bf3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f8be2158142447f19bba5889357abc1c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_8d78f2ed550d41418e7854c384c52463",
+            "placeholder": "​",
+            "style": "IPY_MODEL_1f276ec1e4b84f599200bca5c14a1f61",
+            "value": "Creating json from Arrow format: 100%"
+          }
+        },
+        "f8cdc7f86a90420d90a5e186d57d7c73": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c72e9abef718441baf1ae313c8e5dbf9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_f0900425ee8e46beb75d33697c80bddd",
+            "value": " 650k/650k [00:00&lt;00:00, 2.00MB/s]"
+          }
+        },
+        "fb0aec799c1a40f9a9b148921a6421fa": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_dd3a3867381b494aa04e301c66c102d1",
+            "placeholder": "​",
+            "style": "IPY_MODEL_20a5a2d7eb7c4d9bbc04a72188facdd4",
+            "value": " 3/3 [00:01&lt;00:00,  2.51ba/s]"
+          }
+        },
+        "fc41d9d6971b4f7cafa4247a82d59dec": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "fd3dcf7c89cf4f4aae7095ec921ad53a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_f8be2158142447f19bba5889357abc1c",
+              "IPY_MODEL_9fb5588c5f7d418691fc5ce815d42383",
+              "IPY_MODEL_aa9e5107279d46fca40c992b03102c52"
+            ],
+            "layout": "IPY_MODEL_6dd5334fcc8648ed8a72164899335a13"
+          }
+        },
+        "fe87dbdb92dc43f7963f9b9a87a08df0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f3ce70515e5b4f648e0472aa0cce5bf3",
+            "placeholder": "​",
+            "style": "IPY_MODEL_21de204374fe47f9872f70b32c0e174c",
+            "value": " 2400/2400 [00:00&lt;00:00, 3368.08 examples/s]"
+          }
+        },
+        "ff70ea10be5d40b59bd357afe599e6bf": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_896e188c5b3c45f1b8488abefc79b733",
+            "max": 3355,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_0593582beaa34d4b87282fa14276585c",
+            "value": 3355
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/llm_evals_w_crowdlab/requirements.txt b/llm_evals_w_crowdlab/requirements.txt
new file mode 100644
index 0000000..47faa41
--- /dev/null
+++ b/llm_evals_w_crowdlab/requirements.txt
@@ -0,0 +1,497 @@
+absl-py==1.4.0
+aiohttp==3.9.5
+aiosignal==1.3.1
+alabaster==0.7.16
+albumentations==1.3.1
+altair==4.2.2
+annotated-types==0.7.0
+anyio==3.7.1
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+array_record==0.5.1
+arviz==0.15.1
+astropy==5.3.4
+astunparse==1.6.3
+async-timeout==4.0.3
+atpublic==4.1.0
+attrs==23.2.0
+audioread==3.0.1
+autograd==1.6.2
+Babel==2.15.0
+backcall==0.2.0
+beautifulsoup4==4.12.3
+bidict==0.23.1
+bigframes==1.9.0
+bleach==6.1.0
+blinker==1.4
+blis==0.7.11
+blosc2==2.0.0
+bokeh==3.3.4
+bqplot==0.12.43
+branca==0.7.2
+build==1.2.1
+CacheControl==0.14.0
+cachetools==5.3.3
+catalogue==2.0.10
+certifi==2024.6.2
+cffi==1.16.0
+chardet==5.2.0
+charset-normalizer==3.3.2
+chex==0.1.86
+cleanlab==2.6.6
+click==8.1.7
+click-plugins==1.1.1
+cligj==0.7.2
+cloudpathlib==0.18.1
+cloudpickle==2.2.1
+cmake==3.27.9
+cmdstanpy==1.2.4
+colorcet==3.1.0
+colorlover==0.3.0
+colour==0.1.5
+community==1.0.0b1
+confection==0.1.5
+cons==0.4.6
+contextlib2==21.6.0
+contourpy==1.2.1
+cryptography==42.0.8
+cuda-python==12.2.1
+cudf-cu12 @ https://pypi.nvidia.com/cudf-cu12/cudf_cu12-24.4.1-cp310-cp310-manylinux_2_28_x86_64.whl#sha256=57366e7ef09dc63e0b389aff20df6c37d91e2790065861ee31a4720149f5b694
+cufflinks==0.17.3
+cupy-cuda12x==12.2.0
+cvxopt==1.3.2
+cvxpy==1.3.4
+cycler==0.12.1
+cymem==2.0.8
+Cython==3.0.10
+dask==2023.8.1
+datascience==0.17.6
+datasets==2.20.0
+db-dtypes==1.2.0
+dbus-python==1.2.18
+debugpy==1.6.6
+decorator==4.4.2
+defusedxml==0.7.1
+dill==0.3.8
+distributed==2023.8.1
+distro==1.7.0
+dlib==19.24.4
+dm-tree==0.1.8
+docstring_parser==0.16
+docutils==0.18.1
+dopamine_rl==4.0.9
+duckdb==0.10.3
+earthengine-api==0.1.408
+easydict==1.13
+ecos==2.0.14
+editdistance==0.6.2
+eerepr==0.0.4
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
+entrypoints==0.4
+et-xmlfile==1.1.0
+etils==1.7.0
+etuples==0.3.9
+exceptiongroup==1.2.1
+fastai==2.7.15
+fastcore==1.5.46
+fastdownload==0.0.7
+fastjsonschema==2.20.0
+fastprogress==1.0.3
+fastrlock==0.8.2
+filelock==3.15.3
+fiona==1.9.6
+firebase-admin==5.3.0
+Flask==2.2.5
+flatbuffers==24.3.25
+flax==0.8.4
+folium==0.14.0
+fonttools==4.53.0
+frozendict==2.4.4
+frozenlist==1.4.1
+fsspec==2023.6.0
+future==0.18.3
+gast==0.5.4
+gcsfs==2023.6.0
+GDAL==3.6.4
+gdown==5.1.0
+geemap==0.32.1
+gensim==4.3.2
+geocoder==1.38.1
+geographiclib==2.0
+geopandas==0.13.2
+geopy==2.3.0
+gin-config==0.5.0
+glob2==0.7
+google==2.0.3
+google-ai-generativelanguage==0.6.4
+google-api-core==2.11.1
+google-api-python-client==2.84.0
+google-auth==2.27.0
+google-auth-httplib2==0.1.1
+google-auth-oauthlib==1.2.0
+google-cloud-aiplatform==1.56.0
+google-cloud-bigquery==3.21.0
+google-cloud-bigquery-connection==1.12.1
+google-cloud-bigquery-storage==2.25.0
+google-cloud-core==2.3.3
+google-cloud-datastore==2.15.2
+google-cloud-firestore==2.11.1
+google-cloud-functions==1.13.3
+google-cloud-iam==2.15.0
+google-cloud-language==2.13.3
+google-cloud-resource-manager==1.12.3
+google-cloud-storage==2.8.0
+google-cloud-translate==3.11.3
+google-colab @ file:///colabtools/dist/google-colab-1.0.0.tar.gz#sha256=4b82ee85a233a3034fc34bde479ecda74a4fce0b408178f732d12a0161bc0de8
+google-crc32c==1.5.0
+google-generativeai==0.5.4
+google-pasta==0.2.0
+google-resumable-media==2.7.1
+googleapis-common-protos==1.63.1
+googledrivedownloader==0.4
+graphviz==0.20.3
+greenlet==3.0.3
+grpc-google-iam-v1==0.13.0
+grpcio==1.64.1
+grpcio-status==1.48.2
+gspread==6.0.2
+gspread-dataframe==3.3.1
+gym==0.25.2
+gym-notices==0.0.8
+h11==0.14.0
+h5netcdf==1.3.0
+h5py==3.9.0
+holidays==0.51
+holoviews==1.17.1
+html5lib==1.1
+httpcore==1.0.5
+httpimport==1.3.1
+httplib2==0.22.0
+httpx==0.27.0
+huggingface-hub==0.23.4
+humanize==4.7.0
+hyperopt==0.2.7
+ibis-framework==8.0.0
+idna==3.7
+imageio==2.31.6
+imageio-ffmpeg==0.5.1
+imagesize==1.4.1
+imbalanced-learn==0.10.1
+imgaug==0.4.0
+immutabledict==4.2.0
+importlib_metadata==7.2.0
+importlib_resources==6.4.0
+imutils==0.5.4
+inflect==7.0.0
+iniconfig==2.0.0
+intel-openmp==2023.2.4
+ipyevents==2.0.2
+ipyfilechooser==0.6.0
+ipykernel==5.5.6
+ipyleaflet==0.18.2
+ipyparallel==8.8.0
+ipython==7.34.0
+ipython-genutils==0.2.0
+ipython-sql==0.5.0
+ipytree==0.2.2
+ipywidgets==7.7.1
+itsdangerous==2.2.0
+jax==0.4.26
+jaxlib @ https://storage.googleapis.com/jax-releases/cuda12/jaxlib-0.4.26+cuda12.cudnn89-cp310-cp310-manylinux2014_x86_64.whl#sha256=813cf1fe3e7ca4dbf5327d6e7b4fc8521e92d8bba073ee645ae0d5d036a25750
+jeepney==0.7.1
+jellyfish==1.0.4
+jieba==0.42.1
+Jinja2==3.1.4
+joblib==1.4.2
+jsonpickle==3.2.2
+jsonschema==4.19.2
+jsonschema-specifications==2023.12.1
+jupyter-client==6.1.12
+jupyter-console==6.1.0
+jupyter-server==1.24.0
+jupyter_core==5.7.2
+jupyterlab_pygments==0.3.0
+jupyterlab_widgets==3.0.11
+kaggle==1.6.14
+kagglehub==0.2.5
+keras==2.15.0
+keyring==23.5.0
+kiwisolver==1.4.5
+langcodes==3.4.0
+language_data==1.2.0
+launchpadlib==1.10.16
+lazr.restfulclient==0.14.4
+lazr.uri==1.0.6
+lazy_loader==0.4
+libclang==18.1.1
+librosa==0.10.2.post1
+lightgbm==4.1.0
+linkify-it-py==2.0.3
+llvmlite==0.41.1
+locket==1.0.0
+logical-unification==0.4.6
+lxml==4.9.4
+malloy==2023.1067
+marisa-trie==1.2.0
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.7.1
+matplotlib-inline==0.1.7
+matplotlib-venn==0.11.10
+mdit-py-plugins==0.4.1
+mdurl==0.1.2
+miniKanren==1.0.3
+missingno==0.5.2
+mistune==0.8.4
+mizani==0.9.3
+mkl==2023.2.0
+ml-dtypes==0.2.0
+mlxtend==0.22.0
+more-itertools==10.1.0
+moviepy==1.0.3
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+multipledispatch==1.0.0
+multiprocess==0.70.16
+multitasking==0.0.11
+murmurhash==1.0.10
+music21==9.1.0
+natsort==8.4.0
+nbclassic==1.1.0
+nbclient==0.10.0
+nbconvert==6.5.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.3
+nibabel==4.0.2
+nltk==3.8.1
+notebook==6.5.5
+notebook_shim==0.2.4
+numba==0.58.1
+numexpr==2.10.1
+numpy==1.25.2
+nvtx==0.2.10
+oauth2client==4.1.3
+oauthlib==3.2.2
+openai==1.35.6
+opencv-contrib-python==4.8.0.76
+opencv-python==4.8.0.76
+opencv-python-headless==4.10.0.84
+openpyxl==3.1.4
+opt-einsum==3.3.0
+optax==0.2.2
+orbax-checkpoint==0.4.4
+osqp==0.6.2.post8
+packaging==24.1
+pandas==2.0.3
+pandas-datareader==0.10.0
+pandas-gbq==0.19.2
+pandas-stubs==2.0.3.230814
+pandocfilters==1.5.1
+panel==1.3.8
+param==2.1.0
+parso==0.8.4
+parsy==2.1
+partd==1.4.2
+pathlib==1.0.1
+patsy==0.5.6
+peewee==3.17.5
+pexpect==4.9.0
+pickleshare==0.7.5
+Pillow==9.4.0
+pip-tools==6.13.0
+platformdirs==4.2.2
+plotly==5.15.0
+plotnine==0.12.4
+pluggy==1.5.0
+polars==0.20.2
+pooch==1.8.2
+portpicker==1.5.2
+prefetch-generator==1.0.3
+preshed==3.0.9
+prettytable==3.10.0
+proglog==0.1.10
+progressbar2==4.2.0
+prometheus_client==0.20.0
+promise==2.3
+prompt_toolkit==3.0.47
+prophet==1.1.5
+proto-plus==1.24.0
+protobuf==3.20.3
+psutil==5.9.5
+psycopg2==2.9.9
+ptyprocess==0.7.0
+py-cpuinfo==9.0.0
+py4j==0.10.9.7
+pyarrow==16.1.0
+pyarrow-hotfix==0.6
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycocotools==2.0.8
+pycparser==2.22
+pydantic==2.7.4
+pydantic_core==2.18.4
+pydata-google-auth==1.8.2
+pydot==1.4.2
+pydot-ng==2.0.0
+pydotplus==2.0.2
+PyDrive==1.3.1
+PyDrive2==1.6.3
+pyerfa==2.0.1.4
+pygame==2.5.2
+Pygments==2.16.1
+PyGObject==3.42.1
+PyJWT==2.3.0
+pymc==5.10.4
+pymystem3==0.2.0
+pynvjitlink-cu12==0.2.4
+PyOpenGL==3.1.7
+pyOpenSSL==24.1.0
+pyparsing==3.1.2
+pyperclip==1.9.0
+pyproj==3.6.1
+pyproject_hooks==1.1.0
+pyshp==2.3.1
+PySocks==1.7.1
+pytensor==2.18.6
+pytest==7.4.4
+python-apt @ file:///backend-container/containers/python_apt-0.0.0-cp310-cp310-linux_x86_64.whl#sha256=b209c7165d6061963abe611492f8c91c3bcef4b7a6600f966bab58900c63fefa
+python-box==7.2.0
+python-dateutil==2.8.2
+python-louvain==0.16
+python-slugify==8.0.4
+python-utils==3.8.2
+pytz==2023.4
+pyviz_comms==3.0.2
+PyWavelets==1.6.0
+PyYAML==6.0.1
+pyzmq==24.0.1
+qdldl==0.1.7.post3
+qudida==0.0.4
+ratelim==0.1.6
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+requests-oauthlib==1.3.1
+requirements-parser==0.9.0
+rich==13.7.1
+rmm-cu12==24.4.0
+rpds-py==0.18.1
+rpy2==3.4.2
+rsa==4.9
+safetensors==0.4.3
+scikit-image==0.19.3
+scikit-learn==1.2.2
+scipy==1.11.4
+scooby==0.10.0
+scs==3.2.4.post3
+seaborn==0.13.1
+SecretStorage==3.3.1
+Send2Trash==1.8.3
+sentencepiece==0.1.99
+shapely==2.0.4
+shellingham==1.5.4
+simple_parsing==0.1.5
+six==1.16.0
+sklearn-pandas==2.2.0
+smart-open==7.0.4
+sniffio==1.3.1
+snowballstemmer==2.2.0
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+spacy==3.7.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+Sphinx==5.0.2
+sphinxcontrib-applehelp==1.0.8
+sphinxcontrib-devhelp==1.0.6
+sphinxcontrib-htmlhelp==2.0.5
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.7
+sphinxcontrib-serializinghtml==1.1.10
+SQLAlchemy==2.0.31
+sqlglot==20.11.0
+sqlparse==0.5.0
+srsly==2.4.8
+stanio==0.5.0
+statsmodels==0.14.2
+StrEnum==0.4.15
+sympy==1.12.1
+tables==3.8.0
+tabulate==0.9.0
+tbb==2021.13.0
+tblib==3.0.0
+tenacity==8.4.1
+tensorboard==2.15.2
+tensorboard-data-server==0.7.2
+tensorflow @ https://storage.googleapis.com/colab-tf-builds-public-09h6ksrfwbb9g9xv/tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=a2ec79931350b378c1ef300ca836b52a55751acb71a433582508a07f0de57c42
+tensorflow-datasets==4.9.6
+tensorflow-estimator==2.15.0
+tensorflow-gcs-config==2.15.0
+tensorflow-hub==0.16.1
+tensorflow-io-gcs-filesystem==0.37.0
+tensorflow-metadata==1.15.0
+tensorflow-probability==0.23.0
+tensorstore==0.1.45
+termcolor==2.4.0
+terminado==0.18.1
+text-unidecode==1.3
+textblob==0.17.1
+tf-slim==1.1.0
+tf_keras==2.15.1
+thinc==8.2.5
+threadpoolctl==3.5.0
+tifffile==2024.6.18
+tinycss2==1.3.0
+tokenizers==0.19.1
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.1
+torch @ https://download.pytorch.org/whl/cu121/torch-2.3.0%2Bcu121-cp310-cp310-linux_x86_64.whl#sha256=0a12aa9aa6bc442dff8823ac8b48d991fd0771562eaa38593f9c8196d65f7007
+torchaudio @ https://download.pytorch.org/whl/cu121/torchaudio-2.3.0%2Bcu121-cp310-cp310-linux_x86_64.whl#sha256=38b49393f8c322dcaa29d19e5acbf5a0b1978cf1b719445ab670f1fb486e3aa6
+torchsummary==1.5.1
+torchtext==0.18.0
+torchvision @ https://download.pytorch.org/whl/cu121/torchvision-0.18.0%2Bcu121-cp310-cp310-linux_x86_64.whl#sha256=13e1b48dc5ce41ccb8100ab3dd26fdf31d8f1e904ecf2865ac524493013d0df5
+tornado==6.3.3
+tqdm==4.66.4
+traitlets==5.7.1
+traittypes==0.2.1
+transformers==4.41.2
+triton==2.3.0
+tweepy==4.14.0
+typer==0.12.3
+types-pytz==2024.1.0.20240417
+types-setuptools==70.1.0.20240625
+typing_extensions==4.12.2
+tzdata==2024.1
+tzlocal==5.2
+uc-micro-py==1.0.3
+uritemplate==4.1.1
+urllib3==2.0.7
+vega-datasets==0.9.0
+wadllib==1.3.6
+wasabi==1.1.3
+wcwidth==0.2.13
+weasel==0.4.1
+webcolors==24.6.0
+webencodings==0.5.1
+websocket-client==1.8.0
+Werkzeug==3.0.3
+widgetsnbextension==3.6.6
+wordcloud==1.9.3
+wrapt==1.14.1
+xarray==2023.7.0
+xarray-einstats==0.7.0
+xgboost==2.0.3
+xlrd==2.0.1
+xxhash==3.4.1
+xyzservices==2024.6.0
+yarl==1.9.4
+yellowbrick==1.5
+yfinance==0.2.40
+zict==3.0.0
+zipp==3.19.2
\ No newline at end of file

	question_id	model_a	model_b	winner	judge	conversation_a	conversation_b	turn
0	81	alpaca-13b	gpt-3.5-turbo	model_b	author_2	[{'content': 'Compose an engaging travel blog ...	[{'content': 'Compose an engaging travel blog ...	1
1	81	alpaca-13b	gpt-3.5-turbo	model_b	author_2	[{'content': 'Compose an engaging travel blog ...	[{'content': 'Compose an engaging travel blog ...	2
2	81	alpaca-13b	gpt-3.5-turbo	model_b	expert_17	[{'content': 'Compose an engaging travel blog ...	[{'content': 'Compose an engaging travel blog ...	1
3	81	alpaca-13b	gpt-3.5-turbo	model_b	expert_17	[{'content': 'Compose an engaging travel blog ...	[{'content': 'Compose an engaging travel blog ...	2
4	81	alpaca-13b	vicuna-13b-v1.2	model_b	expert_0	[{'content': 'Compose an engaging travel blog ...	[{'content': 'Compose an engaging travel blog ...	1
			winner_binary
		judge	author_0	author_1	author_2	author_3	author_4	author_5	author_6	expert_0	expert_1	expert_10	...	expert_52	expert_53	expert_54	expert_55	expert_56	expert_57	expert_6	expert_7	expert_8	expert_9
question_id	model_a	model_b
81	alpaca-13b	gpt-3.5-turbo	NaN	NaN	1.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
vicuna-13b-v1.2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.0	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
claude-v1	alpaca-13b	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
llama-13b	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
gpt-3.5-turbo	alpaca-13b	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
		annotator_quality	agreement_with_consensus	worst_class	num_examples_labeled
	judge
winner_binary	author_1	0.467630	0.333333	B	3
expert_5	0.564449	0.562500	B	16
expert_35	0.621383	0.625000	A	16
expert_48	0.665313	0.611111	A	18
expert_20	0.675550	0.700000	B	20
expert_47	0.683031	0.700000	A	20
expert_40	0.699576	0.666667	A	21
expert_53	0.711944	0.764706	A	17
expert_28	0.730157	0.772727	B	22
expert_45	0.765455	0.823529	B	17
		annotator_quality	agreement_with_consensus	worst_class	num_examples_labeled
	judge
winner_binary	expert_30	0.967435	1.000000	0	3
expert_52	0.955679	1.000000	0	1
expert_3	0.952204	1.000000	0	17
expert_39	0.946821	0.947368	0	19
expert_4	0.943187	0.952381	1	21
...	...	...	...	...
expert_48	0.665313	0.611111	0	18
expert_35	0.621383	0.625000	0	16
expert_5	0.564449	0.562500	1	16
author_1	0.467630	0.333333	1	3
expert_33	NaN	0.600000	1	5