Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Aug 26, 2024
1 parent e1fdc83 commit 4fb2f61
Show file tree
Hide file tree
Showing 70 changed files with 30,225 additions and 29,701 deletions.
8 changes: 2 additions & 6 deletions argilla/docs/tutorials/token_classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,7 @@
"source": [
"def predict_gliner(model, text, labels, threshold):\n",
" entities = model.predict_entities(text, labels, threshold)\n",
" return [\n",
" {k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities\n",
" ]"
" return [{k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities]"
]
},
{
Expand All @@ -318,9 +316,7 @@
"data = dataset.records.to_list(flatten=True)\n",
"updated_data = [\n",
" {\n",
" \"span_label\": predict_gliner(\n",
" model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70\n",
" ),\n",
" \"span_label\": predict_gliner(model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70),\n",
" \"id\": sample[\"id\"],\n",
" }\n",
" for sample in data\n",
Expand Down
45 changes: 31 additions & 14 deletions docs/_source/getting_started/quickstart_workflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@
"outputs": [],
"source": [
"# Argilla credentials\n",
"api_url = \"http://localhost:6900\" # \"https://<YOUR-HF-SPACE>.hf.space\"\n",
"api_key = DEFAULT_API_KEY # admin.apikey\n",
"api_url = \"http://localhost:6900\" # \"https://<YOUR-HF-SPACE>.hf.space\"\n",
"api_key = DEFAULT_API_KEY # admin.apikey\n",
"# Huggingface credentials\n",
"hf_token = \"hf_...\""
]
Expand Down Expand Up @@ -177,6 +177,7 @@
},
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"### Enable Telemetry\n",
Expand All @@ -187,14 +188,18 @@
{
"cell_type": "code",
"execution_count": null,
"id": "acae54e37e7d407bbb7b55eff062a284",
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" from argilla.utils.telemetry import tutorial_running\n",
"\n",
" tutorial_running()\n",
"except ImportError:\n",
" print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")"
" print(\n",
" \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -330,6 +335,7 @@
{
"cell_type": "code",
"execution_count": 36,
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
"metadata": {},
"outputs": [
{
Expand All @@ -350,6 +356,7 @@
},
{
"cell_type": "markdown",
"id": "8dd0d8092fe74a7c96281538738b07e2",
"metadata": {},
"source": [
"As we can see, the dataset has two columns: `text` and `label`. We will use the label as the annotation of our record. Thus, to match the required attributes of a `TextClassificationRecord`, we need to rename the columns."
Expand All @@ -358,6 +365,7 @@
{
"cell_type": "code",
"execution_count": 76,
"id": "72eea5119410473aa328ad9291626812",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -366,6 +374,7 @@
},
{
"cell_type": "markdown",
"id": "8edb47106e1a46a883d545849b8ab81b",
"metadata": {},
"source": [
"Now, we can inspect our dataset."
Expand All @@ -374,6 +383,7 @@
{
"cell_type": "code",
"execution_count": 77,
"id": "10185d26023b46108eb7d9f57d49d2b3",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -439,6 +449,7 @@
},
{
"cell_type": "markdown",
"id": "8763a12b2bbd4a93a75aff182afb95dc",
"metadata": {},
"source": [
"Once, we checked that everything is correct, we can convert it to an Argilla dataset."
Expand All @@ -447,6 +458,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7623eae2785240b9bd12b16a66d81610",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -621,6 +633,7 @@
},
{
"cell_type": "markdown",
"id": "7cdc8c89c7104fffa095e18ddfef8986",
"metadata": {},
"source": [
"As the label is not needed in this case, we will add it as metadata."
Expand All @@ -629,14 +642,16 @@
{
"cell_type": "code",
"execution_count": null,
"id": "b118ea5561624da68c537baed56e602f",
"metadata": {},
"outputs": [],
"source": [
"def metadata_to_dict(row):\n",
" metadata = {}\n",
" metadata[\"label\"] = row[\"label\"]\n",
" row['metadata'] = metadata\n",
" return row\n",
" metadata = {}\n",
" metadata[\"label\"] = row[\"label\"]\n",
" row[\"metadata\"] = metadata\n",
" return row\n",
"\n",
"\n",
"dataset = dataset.map(metadata_to_dict, remove_columns=[\"label\"])"
]
Expand Down Expand Up @@ -692,6 +707,7 @@
"# Load a english spaCy model to tokenize our text\n",
"nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
"\n",
"# Define our tokenize function\n",
"def tokenize(row):\n",
" tokens = [token.text for token in nlp(row[\"text\"])]\n",
Expand Down Expand Up @@ -915,7 +931,11 @@
"from datasets import load_dataset\n",
"\n",
"# Load the Dataset from the Hugging Face Hub and extract a subset of the train split as example\n",
"dataset = load_dataset(\"europa_ecdc_tm\", \"en2fr\", split=\"train\").shuffle(seed=30).select(range(100))"
"dataset = (\n",
" load_dataset(\"europa_ecdc_tm\", \"en2fr\", split=\"train\")\n",
" .shuffle(seed=30)\n",
" .select(range(100))\n",
")"
]
},
{
Expand Down Expand Up @@ -999,11 +1019,11 @@
"source": [
"# Define our helper extract function\n",
"def extract(row):\n",
" return {\"text\": row[\"translation\"][\"en\"], \"prediction\":[row[\"translation\"][\"fr\"]]}\n",
" return {\"text\": row[\"translation\"][\"en\"], \"prediction\": [row[\"translation\"][\"fr\"]]}\n",
"\n",
"\n",
"# Map the extract function to our dataset\n",
"dataset = dataset.map(extract, remove_columns = [\"translation\"])"
"dataset = dataset.map(extract, remove_columns=[\"translation\"])"
]
},
{
Expand Down Expand Up @@ -1463,10 +1483,7 @@
"sentence = \"I love this film, but the new remake is terrible.\"\n",
"\n",
"trainer = ArgillaTrainer(\n",
" name=\"imdb\",\n",
" workspace=\"argilla\",\n",
" framework=\"spacy\",\n",
" train_size=0.8\n",
" name=\"imdb\", workspace=\"argilla\", framework=\"spacy\", train_size=0.8\n",
")\n",
"trainer.update_config(max_epochs=1, max_steps=1)\n",
"trainer.train(output_dir=\"my_easy_model\")\n",
Expand Down
22 changes: 13 additions & 9 deletions docs/_source/getting_started/quickstart_workflow_feedback.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@
],
"source": [
"import argilla as rg\n",
"\n",
"rg.init(api_url=api_url, api_key=api_key)\n",
"\n",
"# # If you want to use your private HF Space\n",
Expand All @@ -150,6 +151,7 @@
},
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"### Enable Telemetry\n",
Expand All @@ -160,14 +162,18 @@
{
"cell_type": "code",
"execution_count": null,
"id": "acae54e37e7d407bbb7b55eff062a284",
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" from argilla.utils.telemetry import tutorial_running\n",
"\n",
" tutorial_running()\n",
"except ImportError:\n",
" print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")"
" print(\n",
" \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -292,7 +298,7 @@
" fields={\n",
" \"text\": \"I feel sad today\",\n",
" },\n",
" )\n",
" ),\n",
"]\n",
"dataset.add_records(records)"
]
Expand Down Expand Up @@ -349,7 +355,9 @@
"outputs": [],
"source": [
"# Besides Argilla, it can also be imported with load_dataset from datasets\n",
"dataset_hf = rg.FeedbackDataset.from_huggingface(\"argilla/emotion\", split=\"train[1:101]\")"
"dataset_hf = rg.FeedbackDataset.from_huggingface(\n",
" \"argilla/emotion\", split=\"train[1:101]\"\n",
")"
]
},
{
Expand Down Expand Up @@ -396,8 +404,7 @@
"from argilla.feedback import TrainingTask\n",
"\n",
"task = TrainingTask.for_text_classification(\n",
" text=dataset_hf.field_by_name(\"text\"),\n",
" label=dataset_hf.question_by_name(\"label\")\n",
" text=dataset_hf.field_by_name(\"text\"), label=dataset_hf.question_by_name(\"label\")\n",
")"
]
},
Expand All @@ -421,10 +428,7 @@
"from argilla.feedback import ArgillaTrainer\n",
"\n",
"trainer = ArgillaTrainer(\n",
" dataset=dataset_hf,\n",
" task=task,\n",
" framework=\"setfit\",\n",
" train_size=0.8\n",
" dataset=dataset_hf, task=task, framework=\"setfit\", train_size=0.8\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@
"outputs": [],
"source": [
"import argilla as rg\n",
"from argilla.client.feedback.integrations.textdescriptives import TextDescriptivesExtractor\n",
"from argilla.client.feedback.integrations.textdescriptives import (\n",
" TextDescriptivesExtractor,\n",
")\n",
"\n",
"from datasets import load_dataset"
]
Expand All @@ -119,11 +121,7 @@
"# Replace api_url with the url to your HF Spaces URL if using Spaces\n",
"# Replace api_key if you configured a custom API key\n",
"# Replace workspace with the name of your workspace\n",
"rg.init(\n",
" api_url=\"http://localhost:6900\", \n",
" api_key=\"owner.apikey\",\n",
" workspace=\"admin\"\n",
")"
"rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")"
]
},
{
Expand All @@ -146,7 +144,7 @@
"# # Replace api_url with the url to your HF Spaces URL\n",
"# # Replace api_key if you configured a custom API key\n",
"# rg.init(\n",
"# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n",
"# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n",
"# api_key=\"admin.apikey\",\n",
"# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n",
"# )"
Expand All @@ -169,9 +167,12 @@
"source": [
"try:\n",
" from argilla.utils.telemetry import tutorial_running\n",
"\n",
" tutorial_running()\n",
"except ImportError:\n",
" print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")"
" print(\n",
" \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -333,10 +334,10 @@
"source": [
"# Initialize the TextDescriptivesExtractor\n",
"tde = TextDescriptivesExtractor(\n",
" model = \"en\",\n",
" metrics = None,\n",
" visible_for_annotators = False,\n",
" show_progress = True,\n",
" model=\"en\",\n",
" metrics=None,\n",
" visible_for_annotators=False,\n",
" show_progress=True,\n",
")"
]
},
Expand Down Expand Up @@ -431,10 +432,10 @@
"source": [
"# Initialize the TextDescriptivesExtractor\n",
"tde = TextDescriptivesExtractor(\n",
" model = \"en\",\n",
" metrics = [\"descriptive_stats\", \"readability\"],\n",
" visible_for_annotators = True,\n",
" show_progress = True,\n",
" model=\"en\",\n",
" metrics=[\"descriptive_stats\", \"readability\"],\n",
" visible_for_annotators=True,\n",
" show_progress=True,\n",
")"
]
},
Expand Down
Loading

0 comments on commit 4fb2f61

Please sign in to comment.