[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
argilla-io · Aug 26, 2024 · 4fb2f61 · 4fb2f61
1 parent e1fdc83
commit 4fb2f61
Show file tree

Hide file tree

Showing 70 changed files with 30,225 additions and 29,701 deletions.
diff --git a/argilla/docs/tutorials/token_classification.ipynb b/argilla/docs/tutorials/token_classification.ipynb
@@ -297,9 +297,7 @@
    "source": [
     "def predict_gliner(model, text, labels, threshold):\n",
     "    entities = model.predict_entities(text, labels, threshold)\n",
-    "    return [\n",
-    "        {k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities\n",
-    "    ]"
+    "    return [{k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities]"
    ]
   },
   {
@@ -318,9 +316,7 @@
     "data = dataset.records.to_list(flatten=True)\n",
     "updated_data = [\n",
     "    {\n",
-    "        \"span_label\": predict_gliner(\n",
-    "            model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70\n",
-    "        ),\n",
+    "        \"span_label\": predict_gliner(model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70),\n",
     "        \"id\": sample[\"id\"],\n",
     "    }\n",
     "    for sample in data\n",

diff --git a/docs/_source/getting_started/quickstart_workflow.ipynb b/docs/_source/getting_started/quickstart_workflow.ipynb
@@ -134,8 +134,8 @@
    "outputs": [],
    "source": [
     "# Argilla credentials\n",
-    "api_url = \"http://localhost:6900\" # \"https://<YOUR-HF-SPACE>.hf.space\"\n",
-    "api_key = DEFAULT_API_KEY # admin.apikey\n",
+    "api_url = \"http://localhost:6900\"  # \"https://<YOUR-HF-SPACE>.hf.space\"\n",
+    "api_key = DEFAULT_API_KEY  # admin.apikey\n",
     "# Huggingface credentials\n",
     "hf_token = \"hf_...\""
    ]
@@ -177,6 +177,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7fb27b941602401d91542211134fc71a",
    "metadata": {},
    "source": [
     "### Enable Telemetry\n",
@@ -187,14 +188,18 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "acae54e37e7d407bbb7b55eff062a284",
    "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
     "    from argilla.utils.telemetry import tutorial_running\n",
+    "\n",
     "    tutorial_running()\n",
     "except ImportError:\n",
-    "    print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")"
+    "    print(\n",
+    "        \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n",
+    "    )"
    ]
   },
   {
@@ -330,6 +335,7 @@
   {
    "cell_type": "code",
    "execution_count": 36,
+   "id": "9a63283cbaf04dbcab1f6479b197f3a8",
    "metadata": {},
    "outputs": [
     {
@@ -350,6 +356,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "8dd0d8092fe74a7c96281538738b07e2",
    "metadata": {},
    "source": [
     "As we can see, the dataset has two columns: `text` and `label`. We will use the label as the annotation of our record. Thus, to match the required attributes of a `TextClassificationRecord`, we need to rename the columns."
@@ -358,6 +365,7 @@
   {
    "cell_type": "code",
    "execution_count": 76,
+   "id": "72eea5119410473aa328ad9291626812",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -366,6 +374,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "8edb47106e1a46a883d545849b8ab81b",
    "metadata": {},
    "source": [
     "Now, we can inspect our dataset."
@@ -374,6 +383,7 @@
   {
    "cell_type": "code",
    "execution_count": 77,
+   "id": "10185d26023b46108eb7d9f57d49d2b3",
    "metadata": {},
    "outputs": [
     {
@@ -439,6 +449,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "8763a12b2bbd4a93a75aff182afb95dc",
    "metadata": {},
    "source": [
     "Once, we checked that everything is correct, we can convert it to an Argilla dataset."
@@ -447,6 +458,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "7623eae2785240b9bd12b16a66d81610",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -621,6 +633,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7cdc8c89c7104fffa095e18ddfef8986",
    "metadata": {},
    "source": [
     "As the label is not needed in this case, we will add it as metadata."
@@ -629,14 +642,16 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "b118ea5561624da68c537baed56e602f",
    "metadata": {},
    "outputs": [],
    "source": [
     "def metadata_to_dict(row):\n",
-    "  metadata = {}\n",
-    "  metadata[\"label\"] = row[\"label\"]\n",
-    "  row['metadata'] = metadata\n",
-    "  return row\n",
+    "    metadata = {}\n",
+    "    metadata[\"label\"] = row[\"label\"]\n",
+    "    row[\"metadata\"] = metadata\n",
+    "    return row\n",
+    "\n",
     "\n",
     "dataset = dataset.map(metadata_to_dict, remove_columns=[\"label\"])"
    ]
@@ -692,6 +707,7 @@
     "# Load a english spaCy model to tokenize our text\n",
     "nlp = spacy.load(\"en_core_web_sm\")\n",
     "\n",
+    "\n",
     "# Define our tokenize function\n",
     "def tokenize(row):\n",
     "    tokens = [token.text for token in nlp(row[\"text\"])]\n",
@@ -915,7 +931,11 @@
     "from datasets import load_dataset\n",
     "\n",
     "# Load the Dataset from the Hugging Face Hub and extract a subset of the train split as example\n",
-    "dataset = load_dataset(\"europa_ecdc_tm\", \"en2fr\", split=\"train\").shuffle(seed=30).select(range(100))"
+    "dataset = (\n",
+    "    load_dataset(\"europa_ecdc_tm\", \"en2fr\", split=\"train\")\n",
+    "    .shuffle(seed=30)\n",
+    "    .select(range(100))\n",
+    ")"
    ]
   },
   {
@@ -999,11 +1019,11 @@
    "source": [
     "# Define our helper extract function\n",
     "def extract(row):\n",
-    "    return {\"text\": row[\"translation\"][\"en\"], \"prediction\":[row[\"translation\"][\"fr\"]]}\n",
+    "    return {\"text\": row[\"translation\"][\"en\"], \"prediction\": [row[\"translation\"][\"fr\"]]}\n",
     "\n",
     "\n",
     "# Map the extract function to our dataset\n",
-    "dataset = dataset.map(extract, remove_columns = [\"translation\"])"
+    "dataset = dataset.map(extract, remove_columns=[\"translation\"])"
    ]
   },
   {
@@ -1463,10 +1483,7 @@
     "sentence = \"I love this film, but the new remake is terrible.\"\n",
     "\n",
     "trainer = ArgillaTrainer(\n",
-    "    name=\"imdb\",\n",
-    "    workspace=\"argilla\",\n",
-    "    framework=\"spacy\",\n",
-    "    train_size=0.8\n",
+    "    name=\"imdb\", workspace=\"argilla\", framework=\"spacy\", train_size=0.8\n",
     ")\n",
     "trainer.update_config(max_epochs=1, max_steps=1)\n",
     "trainer.train(output_dir=\"my_easy_model\")\n",

diff --git a/docs/_source/getting_started/quickstart_workflow_feedback.ipynb b/docs/_source/getting_started/quickstart_workflow_feedback.ipynb
@@ -142,6 +142,7 @@
    ],
    "source": [
     "import argilla as rg\n",
+    "\n",
     "rg.init(api_url=api_url, api_key=api_key)\n",
     "\n",
     "# # If you want to use your private HF Space\n",
@@ -150,6 +151,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7fb27b941602401d91542211134fc71a",
    "metadata": {},
    "source": [
     "### Enable Telemetry\n",
@@ -160,14 +162,18 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "acae54e37e7d407bbb7b55eff062a284",
    "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
     "    from argilla.utils.telemetry import tutorial_running\n",
+    "\n",
     "    tutorial_running()\n",
     "except ImportError:\n",
-    "    print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")"
+    "    print(\n",
+    "        \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n",
+    "    )"
    ]
   },
   {
@@ -292,7 +298,7 @@
     "        fields={\n",
     "            \"text\": \"I feel sad today\",\n",
     "        },\n",
-    "    )\n",
+    "    ),\n",
     "]\n",
     "dataset.add_records(records)"
    ]
@@ -349,7 +355,9 @@
    "outputs": [],
    "source": [
     "# Besides Argilla, it can also be imported with load_dataset from datasets\n",
-    "dataset_hf = rg.FeedbackDataset.from_huggingface(\"argilla/emotion\", split=\"train[1:101]\")"
+    "dataset_hf = rg.FeedbackDataset.from_huggingface(\n",
+    "    \"argilla/emotion\", split=\"train[1:101]\"\n",
+    ")"
    ]
   },
   {
@@ -396,8 +404,7 @@
     "from argilla.feedback import TrainingTask\n",
     "\n",
     "task = TrainingTask.for_text_classification(\n",
-    "    text=dataset_hf.field_by_name(\"text\"),\n",
-    "    label=dataset_hf.question_by_name(\"label\")\n",
+    "    text=dataset_hf.field_by_name(\"text\"), label=dataset_hf.question_by_name(\"label\")\n",
     ")"
    ]
   },
@@ -421,10 +428,7 @@
     "from argilla.feedback import ArgillaTrainer\n",
     "\n",
     "trainer = ArgillaTrainer(\n",
-    "    dataset=dataset_hf,\n",
-    "    task=task,\n",
-    "    framework=\"setfit\",\n",
-    "    train_size=0.8\n",
+    "    dataset=dataset_hf, task=task, framework=\"setfit\", train_size=0.8\n",
     ")"
    ]
   },

diff --git a/docs/_source/practical_guides/annotation_workflows/add_text_descriptives_as_metadata.ipynb b/docs/_source/practical_guides/annotation_workflows/add_text_descriptives_as_metadata.ipynb
@@ -98,7 +98,9 @@
    "outputs": [],
    "source": [
     "import argilla as rg\n",
-    "from argilla.client.feedback.integrations.textdescriptives import TextDescriptivesExtractor\n",
+    "from argilla.client.feedback.integrations.textdescriptives import (\n",
+    "    TextDescriptivesExtractor,\n",
+    ")\n",
     "\n",
     "from datasets import load_dataset"
    ]
@@ -119,11 +121,7 @@
     "# Replace api_url with the url to your HF Spaces URL if using Spaces\n",
     "# Replace api_key if you configured a custom API key\n",
     "# Replace workspace with the name of your workspace\n",
-    "rg.init(\n",
-    "    api_url=\"http://localhost:6900\", \n",
-    "    api_key=\"owner.apikey\",\n",
-    "    workspace=\"admin\"\n",
-    ")"
+    "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")"
    ]
   },
   {
@@ -146,7 +144,7 @@
     "# # Replace api_url with the url to your HF Spaces URL\n",
     "# # Replace api_key if you configured a custom API key\n",
     "# rg.init(\n",
-    "#     api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n",
+    "#     api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n",
     "#     api_key=\"admin.apikey\",\n",
     "#     extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n",
     "# )"
@@ -169,9 +167,12 @@
    "source": [
     "try:\n",
     "    from argilla.utils.telemetry import tutorial_running\n",
+    "\n",
     "    tutorial_running()\n",
     "except ImportError:\n",
-    "    print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")"
+    "    print(\n",
+    "        \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n",
+    "    )"
    ]
   },
   {
@@ -333,10 +334,10 @@
    "source": [
     "# Initialize the TextDescriptivesExtractor\n",
     "tde = TextDescriptivesExtractor(\n",
-    "    model = \"en\",\n",
-    "    metrics = None,\n",
-    "    visible_for_annotators = False,\n",
-    "    show_progress = True,\n",
+    "    model=\"en\",\n",
+    "    metrics=None,\n",
+    "    visible_for_annotators=False,\n",
+    "    show_progress=True,\n",
     ")"
    ]
   },
@@ -431,10 +432,10 @@
    "source": [
     "# Initialize the TextDescriptivesExtractor\n",
     "tde = TextDescriptivesExtractor(\n",
-    "    model = \"en\",\n",
-    "    metrics = [\"descriptive_stats\", \"readability\"],\n",
-    "    visible_for_annotators = True,\n",
-    "    show_progress = True,\n",
+    "    model=\"en\",\n",
+    "    metrics=[\"descriptive_stats\", \"readability\"],\n",
+    "    visible_for_annotators=True,\n",
+    "    show_progress=True,\n",
     ")"
    ]
   },