argilla-io · pre-commit-ci · Jan 14, 2025 · Jan 14, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,23 +1,23 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: check-yaml
         exclude: argilla/mkdocs.yml|examples/deployments/k8s
       - id: end-of-file-fixer
         exclude_types: [text, jupyter]
       - id: trailing-whitespace
 
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.8
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.1
     hooks:
       - id: ruff-format
 
   ##############################################################################
   # argilla specific hooks
   ##############################################################################
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.8
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.1
     hooks:
       - id: ruff
         files: 'argilla/src/.*\.py$'
@@ -35,7 +35,7 @@ repos:
           - argilla/LICENSE_HEADER
           - --fuzzy-match-generates-todo
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.7.1
+    rev: 0.8.1
     hooks:
       - id: nbstripout
         files: '^argilla/.*\.ipynb$'
@@ -52,8 +52,8 @@ repos:
   ##############################################################################
   # argilla-server specific hooks
   ##############################################################################
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.8
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.1
     hooks:
       - id: ruff
         files: 'argila-server/src/.*\.py$'
@@ -75,7 +75,7 @@ repos:
   # Helm lint hook
   ##############################################################################
   - repo: https://github.com/gruntwork-io/pre-commit
-    rev: v0.1.24
+    rev: v0.1.25
     hooks:
       - id: helmlint
         name: Helm lint

diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py
@@ -298,7 +298,7 @@ def _show_telemetry_warning():
         "    https://docs.argilla.io/latest/reference/argilla-server/telemetry/\n\n"
         "Telemetry is currently enabled. If you want to disable it, you can configure\n"
         "the environment variable before relaunching the server:\n\n"
-        f'{"#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1"}'
+        f"{'#set HF_HUB_DISABLE_TELEMETRY=1' if os.name == 'nt' else '$>export HF_HUB_DISABLE_TELEMETRY=1'}"
     )
     _LOGGER.warning(message)
 

diff --git a/argilla-server/src/argilla_server/security/authentication/oauth2/providers/__init__.py b/argilla-server/src/argilla_server/security/authentication/oauth2/providers/__init__.py
@@ -38,5 +38,5 @@ def get_provider_by_name(name: str) -> Type["OAuth2ClientProvider"]:
         return provider_class
     else:
         raise NotFoundError(
-            f"Unsupported provider {name}. " f"Supported providers are {_ALL_SUPPORTED_OAUTH2_PROVIDERS.keys()}"
+            f"Unsupported provider {name}. Supported providers are {_ALL_SUPPORTED_OAUTH2_PROVIDERS.keys()}"
         )
diff --git a/argilla-v1/src/argilla_v1/client/datasets.py b/argilla-v1/src/argilla_v1/client/datasets.py
@@ -1156,7 +1156,7 @@ def _prepare_for_training_with_spacy(self, nlp: "spacy.Language", records: List[
                     raise ValueError(
                         "The following annotation does not align with the tokens"
                         " produced by the provided spacy language model:"
-                        f" {(anno[0], record.text[anno[1]:anno[2]])}, {list(doc)}"
+                        f" {(anno[0], record.text[anno[1] : anno[2]])}, {list(doc)}"
                     )
                 else:
                     entities.append(span)

diff --git a/argilla-v1/src/argilla_v1/client/feedback/dataset/helpers.py b/argilla-v1/src/argilla_v1/client/feedback/dataset/helpers.py
@@ -169,7 +169,7 @@ def normalize_records(
             new_records.append(record)
         else:
             raise ValueError(
-                "Expected `records` to be a list of `dict` or `FeedbackRecord`," f" got type `{type(record)}` instead."
+                f"Expected `records` to be a list of `dict` or `FeedbackRecord`, got type `{type(record)}` instead."
             )
     return new_records
 
@@ -384,7 +384,7 @@ def _validate_record_metadata(record: FeedbackRecord, metadata_schema: typing.Ty
         metadata_schema.parse_obj(record.metadata)
     except ValidationError as e:
         raise ValueError(
-            f"`FeedbackRecord.metadata` {record.metadata} does not match the expected schema," f" with exception: {e}"
+            f"`FeedbackRecord.metadata` {record.metadata} does not match the expected schema, with exception: {e}"
         ) from e
 
 

diff --git a/argilla-v1/src/argilla_v1/client/feedback/dataset/local/dataset.py b/argilla-v1/src/argilla_v1/client/feedback/dataset/local/dataset.py
@@ -230,7 +230,7 @@ def __getitem__(self, key: Union[slice, int]) -> Union["FeedbackRecord", List["F
         """
         if len(self._records) < 1:
             raise RuntimeError(
-                "In order to get items from `FeedbackDataset` you need to add them first" " with `add_records`."
+                "In order to get items from `FeedbackDataset` you need to add them first with `add_records`."
             )
         if isinstance(key, int) and len(self._records) < key:
             raise IndexError(f"This dataset contains {len(self)} records, so index {key} is out of range.")
@@ -331,8 +331,7 @@ def delete_vectors_settings(
 
         if not self.vectors_settings:
             raise ValueError(
-                "The current `FeedbackDataset` does not contain any `vectors_settings` defined, so"
-                " none can be deleted."
+                "The current `FeedbackDataset` does not contain any `vectors_settings` defined, so none can be deleted."
             )
 
         if not all(vector_setting in self._vectors_settings.keys() for vector_setting in vectors_settings):

diff --git a/argilla-v1/src/argilla_v1/client/feedback/dataset/local/mixins.py b/argilla-v1/src/argilla_v1/client/feedback/dataset/local/mixins.py
@@ -89,7 +89,7 @@ def __delete_dataset(client: "httpx.Client", id: UUID) -> None:
             datasets_api_v1.delete_dataset(client=client, id=id)
         except Exception as e:
             raise Exception(
-                f"Failed while deleting the `FeedbackDataset` with ID '{id}' from Argilla with" f" exception: {e}"
+                f"Failed while deleting the `FeedbackDataset` with ID '{id}' from Argilla with exception: {e}"
             ) from e
 
     @staticmethod

diff --git a/argilla-v1/src/argilla_v1/client/feedback/integrations/huggingface/model_card/model_card.py b/argilla-v1/src/argilla_v1/client/feedback/integrations/huggingface/model_card/model_card.py
@@ -422,7 +422,7 @@ def generate(model_id: str, instruction: str, context: str = "") -> str:
                     )
                     return tokenizer.decode(outputs[0])
 
-                generate("{self.output_dir.replace('"', '')}", "Is a toad a frog?")"""
+                generate("{self.output_dir.replace('"', "")}", "Is a toad a frog?")"""
             )
         elif self.task_type == "for_reward_modeling":
             return predict_call + dedent(

diff --git a/argilla-v1/src/argilla_v1/client/models.py b/argilla-v1/src/argilla_v1/client/models.py
@@ -424,7 +424,7 @@ def __init__(
             raise AssertionError("Missing fields: At least one of `text` or `tokens` argument must be provided!")
 
         if (data.get("annotation") or data.get("prediction")) and text is None:
-            raise AssertionError("Missing field `text`: " "char level spans must be provided with a raw text sentence")
+            raise AssertionError("Missing field `text`: char level spans must be provided with a raw text sentence")
 
         if text is None:
             text = " ".join(tokens)

diff --git a/argilla-v1/src/argilla_v1/client/sdk/commons/errors.py b/argilla-v1/src/argilla_v1/client/sdk/commons/errors.py
@@ -26,7 +26,7 @@ def __init__(self, message: str, response: Any):
         self.response = response
 
     def __str__(self):
-        return f"\nUnexpected response: {self.message}" "\nResponse content:" f"\n{self.response}"
+        return f"\nUnexpected response: {self.message}\nResponse content:\n{self.response}"
 
 
 class InputValueError(BaseClientError):
@@ -52,7 +52,7 @@ def __init__(self, **ctx):
         self.ctx = ctx
 
     def __str__(self):
-        return f"Argilla server returned an error with http status: {self.HTTP_STATUS}. " f"Error details: {self.ctx!r}"
+        return f"Argilla server returned an error with http status: {self.HTTP_STATUS}. Error details: {self.ctx!r}"
 
 
 class BadRequestApiError(ArApiResponseError):

diff --git a/argilla-v1/src/argilla_v1/client/workspaces.py b/argilla-v1/src/argilla_v1/client/workspaces.py
@@ -120,8 +120,7 @@ def users(self) -> List["UserModel"]:
 
     def __repr__(self) -> str:
         return (
-            f"Workspace(id={self.id}, name={self.name},"
-            f" inserted_at={self.inserted_at}, updated_at={self.updated_at})"
+            f"Workspace(id={self.id}, name={self.name}, inserted_at={self.inserted_at}, updated_at={self.updated_at})"
         )
 
     @allowed_for_roles(roles=[UserRole.owner])
@@ -330,8 +329,7 @@ def from_id(cls, id: UUID) -> "Workspace":
             ) from e
         except ValidationApiError as e:
             raise ValueError(
-                "The ID you provided is not a valid UUID, so please make sure that the"
-                " ID you provided is a valid one."
+                "The ID you provided is not a valid UUID, so please make sure that the ID you provided is a valid one."
             ) from e
         except BaseClientError as e:
             raise RuntimeError(f"Error while retrieving workspace with id=`{id}` from Argilla.") from e

diff --git a/argilla-v1/src/argilla_v1/labeling/text_classification/label_models.py b/argilla-v1/src/argilla_v1/labeling/text_classification/label_models.py
@@ -240,8 +240,7 @@ def _make_single_label_records(
                 pred_for_rec = [(self._weak_labels.labels[idx], prob[idx]) for idx in np.argsort(prob)[::-1]]
             else:
                 raise NotImplementedError(
-                    f"The tie break policy '{tie_break_policy.value}' is not"
-                    f" implemented for {self.__class__.__name__}!"
+                    f"The tie break policy '{tie_break_policy.value}' is not implemented for {self.__class__.__name__}!"
                 )
 
             records_with_prediction.append(rec.copy(deep=True))

diff --git a/argilla-v1/src/argilla_v1/training/autotrain_advanced.py b/argilla-v1/src/argilla_v1/training/autotrain_advanced.py
@@ -211,7 +211,7 @@ def __repr__(self):
             formatted_string.append(arg_dict_key)
             for idx, item in enumerate(arg_dict_single):
                 for key, val in item.items():
-                    formatted_string.append(f"\tjob{idx+1}-{key}: {val}")
+                    formatted_string.append(f"\tjob{idx + 1}-{key}: {val}")
         return "\n".join(formatted_string)
 
     def train(self, output_dir: str):

diff --git a/argilla-v1/tests/integration/client/test_models.py b/argilla-v1/tests/integration/client/test_models.py
@@ -130,21 +130,21 @@ def test_token_classification_with_tokens_and_tags(tokens, tags, annotation):
 def test_token_classification_validations():
     with pytest.raises(
         AssertionError,
-        match=("Missing fields: " "At least one of `text` or `tokens` argument must be provided!"),
+        match=("Missing fields: At least one of `text` or `tokens` argument must be provided!"),
     ):
         TokenClassificationRecord()
 
     tokens = ["test", "text"]
     annotation = [("test", 0, 4)]
     with pytest.raises(
         AssertionError,
-        match=("Missing field `text`: " "char level spans must be provided with a raw text sentence"),
+        match=("Missing field `text`: char level spans must be provided with a raw text sentence"),
     ):
         TokenClassificationRecord(tokens=tokens, annotation=annotation)
 
     with pytest.raises(
         AssertionError,
-        match=("Missing field `text`: " "char level spans must be provided with a raw text sentence"),
+        match=("Missing field `text`: char level spans must be provided with a raw text sentence"),
     ):
         TokenClassificationRecord(tokens=tokens, prediction=annotation)
 

diff --git a/argilla-v1/tests/unit/client/sdk/models/conftest.py b/argilla-v1/tests/unit/client/sdk/models/conftest.py
@@ -45,7 +45,7 @@ def check_schema_props(client_props: dict, server_props: dict) -> bool:
                     continue
                 if name not in server_props:
                     LOGGER.warning(
-                        f"Client property {name} not found in server properties. " "Make sure your API compatibility"
+                        f"Client property {name} not found in server properties. Make sure your API compatibility"
                     )
                     different_props.append(name)
                     continue

diff --git a/argilla/docs/community/integrations/llamaindex_rag_github.ipynb b/argilla/docs/community/integrations/llamaindex_rag_github.ipynb
@@ -202,8 +202,7 @@
     "            \".svg\",\n",
     "            \".ico\",\n",
     "            \".json\",\n",
-    "            \".ipynb\",   # Erase this line if you want to include notebooks\n",
-    "\n",
+    "            \".ipynb\",  # Erase this line if you want to include notebooks\n",
     "        ],\n",
     "        GithubRepositoryReader.FilterType.EXCLUDE,\n",
     "    ),\n",
@@ -231,9 +230,7 @@
    "outputs": [],
    "source": [
     "# LLM settings\n",
-    "Settings.llm = OpenAI(\n",
-    "    model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key\n",
-    ")\n",
+    "Settings.llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key)\n",
     "\n",
     "# Load the data and create the index\n",
     "index = VectorStoreIndex.from_documents(documents)\n",

diff --git a/argilla/docs/scripts/gen_popular_issues.py b/argilla/docs/scripts/gen_popular_issues.py
@@ -116,21 +116,21 @@ def fetch_data_from_github(repository, auth_token):
     f.write("    | Rank | Issue | Reactions | Comments |\n")
     f.write("    |------|-------|:---------:|:--------:|\n")
     for ix, row in engagement_df.iterrows():
-        f.write(f"    | {ix+1} | [{row['Issue']}]({row['URL']}) | 👍 {row['Reactions']} | 💬 {row['Comments']} |\n")
+        f.write(f"    | {ix + 1} | [{row['Issue']}]({row['URL']}) | 👍 {row['Reactions']} | 💬 {row['Comments']} |\n")
 
     f.write('\n=== "Latest issues open by the community"\n\n')
     f.write("    | Rank | Issue | Author |\n")
     f.write("    |------|-------|:------:|\n")
     for ix, row in community_issues_df.iterrows():
         state = "🟢" if row["State"] == "open" else "🟣"
-        f.write(f"    | {ix+1} | {state} [{row['Issue']}]({row['URL']}) | by **{row['Author']}** |\n")
+        f.write(f"    | {ix + 1} | {state} [{row['Issue']}]({row['URL']}) | by **{row['Author']}** |\n")
 
     f.write('\n=== "Planned issues for upcoming releases"\n\n')
     f.write("    | Rank | Issue | Milestone |\n")
     f.write("    |------|-------|:------:|\n")
     for ix, row in planned_issues_df.iterrows():
         state = "🟢" if row["State"] == "open" else "🟣"
-        f.write(f"    | {ix+1} | {state} [{row['Issue']}]({row['URL']}) |  **{row['Milestone']}** |\n")
+        f.write(f"    | {ix + 1} | {state} [{row['Issue']}]({row['URL']}) |  **{row['Milestone']}** |\n")
 
     today = datetime.today().date()
     f.write(f"\nLast update: {today}\n")