From 9ff689ca63a7ba3b0fd42f61a3d5e7536307f782 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Jan 2025 00:35:01 +0000 Subject: [PATCH 1/2] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.6.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v5.0.0) - https://github.com/charliermarsh/ruff-pre-commit → https://github.com/astral-sh/ruff-pre-commit - [github.com/astral-sh/ruff-pre-commit: v0.4.8 → v0.9.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.8...v0.9.1) - https://github.com/charliermarsh/ruff-pre-commit → https://github.com/astral-sh/ruff-pre-commit - [github.com/astral-sh/ruff-pre-commit: v0.4.8 → v0.9.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.8...v0.9.1) - [github.com/kynan/nbstripout: 0.7.1 → 0.8.1](https://github.com/kynan/nbstripout/compare/0.7.1...0.8.1) - https://github.com/charliermarsh/ruff-pre-commit → https://github.com/astral-sh/ruff-pre-commit - [github.com/astral-sh/ruff-pre-commit: v0.4.8 → v0.9.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.8...v0.9.1) - [github.com/gruntwork-io/pre-commit: v0.1.24 → v0.1.25](https://github.com/gruntwork-io/pre-commit/compare/v0.1.24...v0.1.25) --- .pre-commit-config.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fb34c1d8ea..9ff92458b0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-yaml exclude: argilla/mkdocs.yml|examples/deployments/k8s @@ -8,16 +8,16 @@ repos: exclude_types: [text, jupyter] - id: trailing-whitespace - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.4.8 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.1 hooks: - id: ruff-format ############################################################################## # argilla specific hooks ############################################################################## - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.4.8 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.1 hooks: - id: ruff files: 'argilla/src/.*\.py$' @@ -35,7 +35,7 @@ repos: - argilla/LICENSE_HEADER - --fuzzy-match-generates-todo - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.1 hooks: - id: nbstripout files: '^argilla/.*\.ipynb$' @@ -52,8 +52,8 @@ repos: ############################################################################## # argilla-server specific hooks ############################################################################## - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.4.8 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.1 hooks: - id: ruff files: 'argila-server/src/.*\.py$' @@ -75,7 +75,7 @@ repos: # Helm lint hook ############################################################################## - repo: https://github.com/gruntwork-io/pre-commit - rev: v0.1.24 + rev: v0.1.25 hooks: - id: helmlint name: Helm lint From b39b9bfac944424c23b6cbc0acae2ff41526acfc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Jan 2025 00:35:27 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- argilla-server/src/argilla_server/_app.py | 2 +- .../oauth2/providers/__init__.py | 2 +- argilla-v1/src/argilla_v1/client/datasets.py | 2 +- .../client/feedback/dataset/helpers.py | 4 +- .../client/feedback/dataset/local/dataset.py | 5 +- .../client/feedback/dataset/local/mixins.py | 2 +- .../huggingface/model_card/model_card.py | 2 +- argilla-v1/src/argilla_v1/client/models.py | 2 +- .../argilla_v1/client/sdk/commons/errors.py | 4 +- .../src/argilla_v1/client/workspaces.py | 6 +- .../text_classification/label_models.py | 3 +- .../argilla_v1/training/autotrain_advanced.py | 2 +- .../tests/integration/client/test_models.py | 6 +- .../tests/unit/client/sdk/models/conftest.py | 2 +- .../integrations/llamaindex_rag_github.ipynb | 7 +- argilla/docs/scripts/gen_popular_issues.py | 6 +- .../docs/tutorials/image_classification.ipynb | 53 +- argilla/docs/tutorials/image_preference.ipynb | 34 +- .../docs/tutorials/token_classification.ipynb | 8 +- argilla/src/argilla/records/_io/_datasets.py | 3 +- .../getting_started/quickstart_workflow.ipynb | 45 +- .../quickstart_workflow_feedback.ipynb | 22 +- .../add_text_descriptives_as_metadata.ipynb | 33 +- .../weak_supervision.ipynb | 67 +- ...ploying-text2text-dvc-explainability.ipynb | 28 +- ...tclassification-colab-activelearning.ipynb | 85 +- ...ying-texttokenclassification-fastapi.ipynb | 40 +- .../notebooks/labelling-spacy-llm.ipynb | 72 +- ...t2text-disaggregators-explainability.ipynb | 18 +- ...ling-textclassification-gpt3-fewshot.ipynb | 39 +- ...ation-sentence-transformers-semantic.ipynb | 30 +- ...cation-sentencetransformers-semantic.ipynb | 20609 ++++++++-------- ...sentencetransformers-weaksupervision.ipynb | 52 +- ...g-textclassification-setfit-zeroshot.ipynb | 57 +- ...assification-sklearn-weaksupervision.ipynb | 38 +- ...assification-snorkel-weaksupervision.ipynb | 22 +- ...labelling-tokenclassification-basics.ipynb | 36 +- ...ing-tokenclassification-deletelabels.ipynb | 29 +- ...ng-tokenclassification-flair-fewshot.ipynb | 18 +- ...lassification-skweak-weaksupervision.ipynb | 28 +- ...tokenclassification-spacy-pretrained.ipynb | 23 +- ...assification-cleanlab-explainability.ipynb | 25 +- ...classification-setfit-explainability.ipynb | 4721 ++-- ...transformersinterpret-explainability.ipynb | 42 +- ...fication-transformers-explainability.ipynb | 26 +- .../ner_fine_tune_bert_beginners.ipynb | 9992 ++++---- ...-classyclassification-activelearning.ipynb | 27 +- ...tclassification-modal-activelearning.ipynb | 74 +- ...ng-textclassification-setfit-fewshot.ipynb | 19 +- ...-textclassification-setfit-sentiment.ipynb | 17 +- ...ssification-smalltext-activelearning.ipynb | 42 +- ...assification-transformers-pretrained.ipynb | 42 +- ...training-textgeneration-unstructured.ipynb | 60 +- ...e_transformers_embeddings_as_vectors.ipynb | 33 +- .../add_text_descriptives_as_metadata.ipynb | 33 +- .../integrations/llama_index.ipynb | 13 +- .../monitor_endpoints with_fastapi.ipynb | 33 +- .../process_documents_with_unstructured.ipynb | 50 +- ...curating-feedback-instructiondataset.ipynb | 99 +- .../end2end_examples/add-metadata-003.ipynb | 51 +- .../add-suggestions-and-responses-005.ipynb | 36 +- .../end2end_examples/add-vectors-004.ipynb | 36 +- .../end2end_examples/assign-records-002.ipynb | 70 +- .../configure-users-and-workspaces-000.ipynb | 18 +- .../end2end_examples/create-dataset-001.ipynb | 2295 +- .../filter-and-query-008.ipynb | 71 +- .../end2end_examples/train-model-006.ipynb | 23 +- .../end2end_examples/use-metrics-007.ipynb | 27 +- .../fine-tuning-openai-rag-feedback.ipynb | 1624 +- .../fine-tuning-sentencesimilarity-rag.ipynb | 101 +- ...belling-feedback-langchain-syntethic.ipynb | 5625 ++--- .../feedback/labelling-feedback-setfit.ipynb | 5860 +++-- .../feedback/labelling-spacy-llm.ipynb | 72 +- .../feedback/making-most-of-markdown.ipynb | 200 +- .../feedback/monitoring-bias-ethics-dpo.ipynb | 178 +- .../monitoring-data-model-drift.ipynb | 191 +- .../feedback/train-reward-model-rlhf.ipynb | 6048 ++--- .../feedback/trainer-feedback-setfit.ipynb | 44 +- .../feedback/training-fastfit-agreement.ipynb | 86 +- .../feedback/training-llm-mistral-sft.ipynb | 94 +- .../feedback/training-qa-transformers.ipynb | 57 +- .../feedback/training-setfit-absa.ipynb | 139 +- .../few_shot_learning_with_setfit.ipynb | 19 +- ..._classification_with_active_learning.ipynb | 69 +- .../label_data_with_semantic_search.ipynb | 30 +- .../label_errors_cleanlab.ipynb | 25 +- .../other_datasets/weak_supervision_ner.ipynb | 28 +- ..._text_classification_semantic_search.ipynb | 53 +- docs/template.ipynb | 11 +- examples/custom_field/custom_field.ipynb | 16 +- 90 files changed, 30310 insertions(+), 29781 deletions(-) diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index 3c134471b9..956f288309 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -298,7 +298,7 @@ def _show_telemetry_warning(): " https://docs.argilla.io/latest/reference/argilla-server/telemetry/\n\n" "Telemetry is currently enabled. If you want to disable it, you can configure\n" "the environment variable before relaunching the server:\n\n" - f'{"#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1"}' + f"{'#set HF_HUB_DISABLE_TELEMETRY=1' if os.name == 'nt' else '$>export HF_HUB_DISABLE_TELEMETRY=1'}" ) _LOGGER.warning(message) diff --git a/argilla-server/src/argilla_server/security/authentication/oauth2/providers/__init__.py b/argilla-server/src/argilla_server/security/authentication/oauth2/providers/__init__.py index 0950bc30d2..c611938db7 100644 --- a/argilla-server/src/argilla_server/security/authentication/oauth2/providers/__init__.py +++ b/argilla-server/src/argilla_server/security/authentication/oauth2/providers/__init__.py @@ -38,5 +38,5 @@ def get_provider_by_name(name: str) -> Type["OAuth2ClientProvider"]: return provider_class else: raise NotFoundError( - f"Unsupported provider {name}. " f"Supported providers are {_ALL_SUPPORTED_OAUTH2_PROVIDERS.keys()}" + f"Unsupported provider {name}. Supported providers are {_ALL_SUPPORTED_OAUTH2_PROVIDERS.keys()}" ) diff --git a/argilla-v1/src/argilla_v1/client/datasets.py b/argilla-v1/src/argilla_v1/client/datasets.py index 53c758a6b2..f12fbf69f4 100644 --- a/argilla-v1/src/argilla_v1/client/datasets.py +++ b/argilla-v1/src/argilla_v1/client/datasets.py @@ -1156,7 +1156,7 @@ def _prepare_for_training_with_spacy(self, nlp: "spacy.Language", records: List[ raise ValueError( "The following annotation does not align with the tokens" " produced by the provided spacy language model:" - f" {(anno[0], record.text[anno[1]:anno[2]])}, {list(doc)}" + f" {(anno[0], record.text[anno[1] : anno[2]])}, {list(doc)}" ) else: entities.append(span) diff --git a/argilla-v1/src/argilla_v1/client/feedback/dataset/helpers.py b/argilla-v1/src/argilla_v1/client/feedback/dataset/helpers.py index 4580bb3ccc..782cf3783c 100644 --- a/argilla-v1/src/argilla_v1/client/feedback/dataset/helpers.py +++ b/argilla-v1/src/argilla_v1/client/feedback/dataset/helpers.py @@ -169,7 +169,7 @@ def normalize_records( new_records.append(record) else: raise ValueError( - "Expected `records` to be a list of `dict` or `FeedbackRecord`," f" got type `{type(record)}` instead." + f"Expected `records` to be a list of `dict` or `FeedbackRecord`, got type `{type(record)}` instead." ) return new_records @@ -384,7 +384,7 @@ def _validate_record_metadata(record: FeedbackRecord, metadata_schema: typing.Ty metadata_schema.parse_obj(record.metadata) except ValidationError as e: raise ValueError( - f"`FeedbackRecord.metadata` {record.metadata} does not match the expected schema," f" with exception: {e}" + f"`FeedbackRecord.metadata` {record.metadata} does not match the expected schema, with exception: {e}" ) from e diff --git a/argilla-v1/src/argilla_v1/client/feedback/dataset/local/dataset.py b/argilla-v1/src/argilla_v1/client/feedback/dataset/local/dataset.py index a3e4c687b1..f52e5627ec 100644 --- a/argilla-v1/src/argilla_v1/client/feedback/dataset/local/dataset.py +++ b/argilla-v1/src/argilla_v1/client/feedback/dataset/local/dataset.py @@ -230,7 +230,7 @@ def __getitem__(self, key: Union[slice, int]) -> Union["FeedbackRecord", List["F """ if len(self._records) < 1: raise RuntimeError( - "In order to get items from `FeedbackDataset` you need to add them first" " with `add_records`." + "In order to get items from `FeedbackDataset` you need to add them first with `add_records`." ) if isinstance(key, int) and len(self._records) < key: raise IndexError(f"This dataset contains {len(self)} records, so index {key} is out of range.") @@ -331,8 +331,7 @@ def delete_vectors_settings( if not self.vectors_settings: raise ValueError( - "The current `FeedbackDataset` does not contain any `vectors_settings` defined, so" - " none can be deleted." + "The current `FeedbackDataset` does not contain any `vectors_settings` defined, so none can be deleted." ) if not all(vector_setting in self._vectors_settings.keys() for vector_setting in vectors_settings): diff --git a/argilla-v1/src/argilla_v1/client/feedback/dataset/local/mixins.py b/argilla-v1/src/argilla_v1/client/feedback/dataset/local/mixins.py index 16be41313d..b351896dc6 100644 --- a/argilla-v1/src/argilla_v1/client/feedback/dataset/local/mixins.py +++ b/argilla-v1/src/argilla_v1/client/feedback/dataset/local/mixins.py @@ -89,7 +89,7 @@ def __delete_dataset(client: "httpx.Client", id: UUID) -> None: datasets_api_v1.delete_dataset(client=client, id=id) except Exception as e: raise Exception( - f"Failed while deleting the `FeedbackDataset` with ID '{id}' from Argilla with" f" exception: {e}" + f"Failed while deleting the `FeedbackDataset` with ID '{id}' from Argilla with exception: {e}" ) from e @staticmethod diff --git a/argilla-v1/src/argilla_v1/client/feedback/integrations/huggingface/model_card/model_card.py b/argilla-v1/src/argilla_v1/client/feedback/integrations/huggingface/model_card/model_card.py index 48a26bbc78..db719d46ef 100644 --- a/argilla-v1/src/argilla_v1/client/feedback/integrations/huggingface/model_card/model_card.py +++ b/argilla-v1/src/argilla_v1/client/feedback/integrations/huggingface/model_card/model_card.py @@ -422,7 +422,7 @@ def generate(model_id: str, instruction: str, context: str = "") -> str: ) return tokenizer.decode(outputs[0]) - generate("{self.output_dir.replace('"', '')}", "Is a toad a frog?")""" + generate("{self.output_dir.replace('"', "")}", "Is a toad a frog?")""" ) elif self.task_type == "for_reward_modeling": return predict_call + dedent( diff --git a/argilla-v1/src/argilla_v1/client/models.py b/argilla-v1/src/argilla_v1/client/models.py index 242434784d..7d4cc6e600 100644 --- a/argilla-v1/src/argilla_v1/client/models.py +++ b/argilla-v1/src/argilla_v1/client/models.py @@ -424,7 +424,7 @@ def __init__( raise AssertionError("Missing fields: At least one of `text` or `tokens` argument must be provided!") if (data.get("annotation") or data.get("prediction")) and text is None: - raise AssertionError("Missing field `text`: " "char level spans must be provided with a raw text sentence") + raise AssertionError("Missing field `text`: char level spans must be provided with a raw text sentence") if text is None: text = " ".join(tokens) diff --git a/argilla-v1/src/argilla_v1/client/sdk/commons/errors.py b/argilla-v1/src/argilla_v1/client/sdk/commons/errors.py index f901fcce1d..359ddc79f3 100644 --- a/argilla-v1/src/argilla_v1/client/sdk/commons/errors.py +++ b/argilla-v1/src/argilla_v1/client/sdk/commons/errors.py @@ -26,7 +26,7 @@ def __init__(self, message: str, response: Any): self.response = response def __str__(self): - return f"\nUnexpected response: {self.message}" "\nResponse content:" f"\n{self.response}" + return f"\nUnexpected response: {self.message}\nResponse content:\n{self.response}" class InputValueError(BaseClientError): @@ -52,7 +52,7 @@ def __init__(self, **ctx): self.ctx = ctx def __str__(self): - return f"Argilla server returned an error with http status: {self.HTTP_STATUS}. " f"Error details: {self.ctx!r}" + return f"Argilla server returned an error with http status: {self.HTTP_STATUS}. Error details: {self.ctx!r}" class BadRequestApiError(ArApiResponseError): diff --git a/argilla-v1/src/argilla_v1/client/workspaces.py b/argilla-v1/src/argilla_v1/client/workspaces.py index 119097452d..35d297528c 100644 --- a/argilla-v1/src/argilla_v1/client/workspaces.py +++ b/argilla-v1/src/argilla_v1/client/workspaces.py @@ -120,8 +120,7 @@ def users(self) -> List["UserModel"]: def __repr__(self) -> str: return ( - f"Workspace(id={self.id}, name={self.name}," - f" inserted_at={self.inserted_at}, updated_at={self.updated_at})" + f"Workspace(id={self.id}, name={self.name}, inserted_at={self.inserted_at}, updated_at={self.updated_at})" ) @allowed_for_roles(roles=[UserRole.owner]) @@ -330,8 +329,7 @@ def from_id(cls, id: UUID) -> "Workspace": ) from e except ValidationApiError as e: raise ValueError( - "The ID you provided is not a valid UUID, so please make sure that the" - " ID you provided is a valid one." + "The ID you provided is not a valid UUID, so please make sure that the ID you provided is a valid one." ) from e except BaseClientError as e: raise RuntimeError(f"Error while retrieving workspace with id=`{id}` from Argilla.") from e diff --git a/argilla-v1/src/argilla_v1/labeling/text_classification/label_models.py b/argilla-v1/src/argilla_v1/labeling/text_classification/label_models.py index 57ecb0cde8..a1ef315616 100644 --- a/argilla-v1/src/argilla_v1/labeling/text_classification/label_models.py +++ b/argilla-v1/src/argilla_v1/labeling/text_classification/label_models.py @@ -240,8 +240,7 @@ def _make_single_label_records( pred_for_rec = [(self._weak_labels.labels[idx], prob[idx]) for idx in np.argsort(prob)[::-1]] else: raise NotImplementedError( - f"The tie break policy '{tie_break_policy.value}' is not" - f" implemented for {self.__class__.__name__}!" + f"The tie break policy '{tie_break_policy.value}' is not implemented for {self.__class__.__name__}!" ) records_with_prediction.append(rec.copy(deep=True)) diff --git a/argilla-v1/src/argilla_v1/training/autotrain_advanced.py b/argilla-v1/src/argilla_v1/training/autotrain_advanced.py index b291ead6c5..04edde074e 100644 --- a/argilla-v1/src/argilla_v1/training/autotrain_advanced.py +++ b/argilla-v1/src/argilla_v1/training/autotrain_advanced.py @@ -211,7 +211,7 @@ def __repr__(self): formatted_string.append(arg_dict_key) for idx, item in enumerate(arg_dict_single): for key, val in item.items(): - formatted_string.append(f"\tjob{idx+1}-{key}: {val}") + formatted_string.append(f"\tjob{idx + 1}-{key}: {val}") return "\n".join(formatted_string) def train(self, output_dir: str): diff --git a/argilla-v1/tests/integration/client/test_models.py b/argilla-v1/tests/integration/client/test_models.py index 59451c0940..9d3a595732 100644 --- a/argilla-v1/tests/integration/client/test_models.py +++ b/argilla-v1/tests/integration/client/test_models.py @@ -130,7 +130,7 @@ def test_token_classification_with_tokens_and_tags(tokens, tags, annotation): def test_token_classification_validations(): with pytest.raises( AssertionError, - match=("Missing fields: " "At least one of `text` or `tokens` argument must be provided!"), + match=("Missing fields: At least one of `text` or `tokens` argument must be provided!"), ): TokenClassificationRecord() @@ -138,13 +138,13 @@ def test_token_classification_validations(): annotation = [("test", 0, 4)] with pytest.raises( AssertionError, - match=("Missing field `text`: " "char level spans must be provided with a raw text sentence"), + match=("Missing field `text`: char level spans must be provided with a raw text sentence"), ): TokenClassificationRecord(tokens=tokens, annotation=annotation) with pytest.raises( AssertionError, - match=("Missing field `text`: " "char level spans must be provided with a raw text sentence"), + match=("Missing field `text`: char level spans must be provided with a raw text sentence"), ): TokenClassificationRecord(tokens=tokens, prediction=annotation) diff --git a/argilla-v1/tests/unit/client/sdk/models/conftest.py b/argilla-v1/tests/unit/client/sdk/models/conftest.py index 560b78c42c..b1ec76d6b1 100644 --- a/argilla-v1/tests/unit/client/sdk/models/conftest.py +++ b/argilla-v1/tests/unit/client/sdk/models/conftest.py @@ -45,7 +45,7 @@ def check_schema_props(client_props: dict, server_props: dict) -> bool: continue if name not in server_props: LOGGER.warning( - f"Client property {name} not found in server properties. " "Make sure your API compatibility" + f"Client property {name} not found in server properties. Make sure your API compatibility" ) different_props.append(name) continue diff --git a/argilla/docs/community/integrations/llamaindex_rag_github.ipynb b/argilla/docs/community/integrations/llamaindex_rag_github.ipynb index a19627f19a..8fd3c5c521 100644 --- a/argilla/docs/community/integrations/llamaindex_rag_github.ipynb +++ b/argilla/docs/community/integrations/llamaindex_rag_github.ipynb @@ -202,8 +202,7 @@ " \".svg\",\n", " \".ico\",\n", " \".json\",\n", - " \".ipynb\", # Erase this line if you want to include notebooks\n", - "\n", + " \".ipynb\", # Erase this line if you want to include notebooks\n", " ],\n", " GithubRepositoryReader.FilterType.EXCLUDE,\n", " ),\n", @@ -231,9 +230,7 @@ "outputs": [], "source": [ "# LLM settings\n", - "Settings.llm = OpenAI(\n", - " model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key\n", - ")\n", + "Settings.llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key)\n", "\n", "# Load the data and create the index\n", "index = VectorStoreIndex.from_documents(documents)\n", diff --git a/argilla/docs/scripts/gen_popular_issues.py b/argilla/docs/scripts/gen_popular_issues.py index d74bdeac1c..f6574fff11 100644 --- a/argilla/docs/scripts/gen_popular_issues.py +++ b/argilla/docs/scripts/gen_popular_issues.py @@ -116,21 +116,21 @@ def fetch_data_from_github(repository, auth_token): f.write(" | Rank | Issue | Reactions | Comments |\n") f.write(" |------|-------|:---------:|:--------:|\n") for ix, row in engagement_df.iterrows(): - f.write(f" | {ix+1} | [{row['Issue']}]({row['URL']}) | 👍 {row['Reactions']} | 💬 {row['Comments']} |\n") + f.write(f" | {ix + 1} | [{row['Issue']}]({row['URL']}) | 👍 {row['Reactions']} | 💬 {row['Comments']} |\n") f.write('\n=== "Latest issues open by the community"\n\n') f.write(" | Rank | Issue | Author |\n") f.write(" |------|-------|:------:|\n") for ix, row in community_issues_df.iterrows(): state = "🟢" if row["State"] == "open" else "🟣" - f.write(f" | {ix+1} | {state} [{row['Issue']}]({row['URL']}) | by **{row['Author']}** |\n") + f.write(f" | {ix + 1} | {state} [{row['Issue']}]({row['URL']}) | by **{row['Author']}** |\n") f.write('\n=== "Planned issues for upcoming releases"\n\n') f.write(" | Rank | Issue | Milestone |\n") f.write(" |------|-------|:------:|\n") for ix, row in planned_issues_df.iterrows(): state = "🟢" if row["State"] == "open" else "🟣" - f.write(f" | {ix+1} | {state} [{row['Issue']}]({row['URL']}) | **{row['Milestone']}** |\n") + f.write(f" | {ix + 1} | {state} [{row['Issue']}]({row['URL']}) | **{row['Milestone']}** |\n") today = datetime.today().date() f.write(f"\nLast update: {today}\n") diff --git a/argilla/docs/tutorials/image_classification.ipynb b/argilla/docs/tutorials/image_classification.ipynb index d49b3fcf4c..46cdd909b0 100644 --- a/argilla/docs/tutorials/image_classification.ipynb +++ b/argilla/docs/tutorials/image_classification.ipynb @@ -93,13 +93,7 @@ "from PIL import Image\n", "\n", "from datasets import load_dataset, Dataset, load_metric\n", - "from transformers import (\n", - " AutoImageProcessor,\n", - " AutoModelForImageClassification,\n", - " pipeline,\n", - " Trainer,\n", - " TrainingArguments\n", - ")\n", + "from transformers import AutoImageProcessor, AutoModelForImageClassification, pipeline, Trainer, TrainingArguments\n", "\n", "import argilla as rg" ] @@ -182,7 +176,7 @@ " title=\"What digit do you see on the image?\",\n", " labels=labels,\n", " )\n", - " ]\n", + " ],\n", ")" ] }, @@ -246,7 +240,7 @@ "n_rows = 100\n", "\n", "hf_dataset = load_dataset(\"ylecun/mnist\", streaming=True)\n", - "dataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\n", + "dataset_rows = [row for _, row in zip(range(n_rows), hf_dataset[\"train\"])]\n", "hf_dataset = Dataset.from_list(dataset_rows)\n", "\n", "hf_dataset" @@ -525,7 +519,8 @@ ], "source": [ "def greyscale_to_rgb(img) -> Image:\n", - " return Image.merge('RGB', (img, img, img))\n", + " return Image.merge(\"RGB\", (img, img, img))\n", + "\n", "\n", "submitted_image_rgb = [\n", " {\n", @@ -556,7 +551,7 @@ "\n", "submitted_image_rgb_processed = [\n", " {\n", - " \"pixel_values\": processor(sample[\"image\"], return_tensors='pt')[\"pixel_values\"],\n", + " \"pixel_values\": processor(sample[\"image\"], return_tensors=\"pt\")[\"pixel_values\"],\n", " \"label\": sample[\"label\"],\n", " }\n", " for sample in submitted_image_rgb\n", @@ -624,8 +619,8 @@ "source": [ "def collate_fn(batch):\n", " return {\n", - " 'pixel_values': torch.stack([torch.tensor(x['pixel_values'][0]) for x in batch]),\n", - " 'labels': torch.tensor([int(x['label']) for x in batch])\n", + " \"pixel_values\": torch.stack([torch.tensor(x[\"pixel_values\"][0]) for x in batch]),\n", + " \"labels\": torch.tensor([int(x[\"label\"]) for x in batch]),\n", " }" ] }, @@ -643,6 +638,8 @@ "outputs": [], "source": [ "metric = load_metric(\"accuracy\", trust_remote_code=True)\n", + "\n", + "\n", "def compute_metrics(p):\n", " return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)" ] @@ -664,7 +661,7 @@ " checkpoint,\n", " num_labels=len(labels),\n", " id2label={int(i): int(c) for i, c in enumerate(labels)},\n", - " label2id={int(c): int(i) for i, c in enumerate(labels)}\n", + " label2id={int(c): int(i) for i, c in enumerate(labels)},\n", ")\n", "model.config" ] @@ -698,19 +695,19 @@ ], "source": [ "training_args = TrainingArguments(\n", - " output_dir=\"./image-classifier\",\n", - " per_device_train_batch_size=16,\n", - " eval_strategy=\"steps\",\n", - " num_train_epochs=1,\n", - " fp16=False, # True if you have a GPU with mixed precision support\n", - " save_steps=100,\n", - " eval_steps=100,\n", - " logging_steps=10,\n", - " learning_rate=2e-4,\n", - " save_total_limit=2,\n", - " remove_unused_columns=True,\n", - " push_to_hub=False,\n", - " load_best_model_at_end=True,\n", + " output_dir=\"./image-classifier\",\n", + " per_device_train_batch_size=16,\n", + " eval_strategy=\"steps\",\n", + " num_train_epochs=1,\n", + " fp16=False, # True if you have a GPU with mixed precision support\n", + " save_steps=100,\n", + " eval_steps=100,\n", + " logging_steps=10,\n", + " learning_rate=2e-4,\n", + " save_total_limit=2,\n", + " remove_unused_columns=True,\n", + " push_to_hub=False,\n", + " load_best_model_at_end=True,\n", ")\n", "\n", "trainer = Trainer(\n", @@ -745,12 +742,14 @@ "source": [ "pipe = pipeline(\"image-classification\", model=model, image_processor=processor)\n", "\n", + "\n", "def run_inference(batch):\n", " predictions = pipe(batch[\"image\"])\n", " batch[\"image_label\"] = [prediction[0][\"label\"] for prediction in predictions]\n", " batch[\"score\"] = [prediction[0][\"score\"] for prediction in predictions]\n", " return batch\n", "\n", + "\n", "hf_dataset = hf_dataset.map(run_inference, batched=True)" ] }, diff --git a/argilla/docs/tutorials/image_preference.ipynb b/argilla/docs/tutorials/image_preference.ipynb index 1f3aa9c43a..d25d433571 100644 --- a/argilla/docs/tutorials/image_preference.ipynb +++ b/argilla/docs/tutorials/image_preference.ipynb @@ -191,11 +191,10 @@ " metadata=[\n", " rg.FloatMetadataProperty(name=\"toxicity\", title=\"Toxicity score\"),\n", " rg.FloatMetadataProperty(name=\"identity_attack\", title=\"Identity attack score\"),\n", - "\n", " ],\n", " vectors=[\n", " rg.VectorField(name=\"original_caption_vector\", dimensions=384),\n", - " ]\n", + " ],\n", ")" ] }, @@ -254,7 +253,7 @@ "n_rows = 25\n", "\n", "hf_dataset = load_dataset(\"tomg-group-umd/pixelprose\", streaming=True)\n", - "dataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\n", + "dataset_rows = [row for _, row in zip(range(n_rows), hf_dataset[\"train\"])]\n", "hf_dataset = Dataset.from_list(dataset_rows)\n", "\n", "hf_dataset" @@ -341,8 +340,7 @@ } ], "source": [ - "hf_dataset = hf_dataset.filter(\n", - " lambda x: any([x[\"url\"].endswith(extension) for extension in [\".jpg\", \".png\", \".jpeg\"]]))\n", + "hf_dataset = hf_dataset.filter(lambda x: any([x[\"url\"].endswith(extension) for extension in [\".jpg\", \".png\", \".jpeg\"]]))\n", "\n", "hf_dataset" ] @@ -380,6 +378,7 @@ "API_URL = \"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell\"\n", "headers = {\"Authorization\": f\"Bearer {os.getenv('HF_TOKEN')}\"}\n", "\n", + "\n", "def query(payload):\n", " response = requests.post(API_URL, headers=headers, json=payload)\n", " if response.status_code == 200:\n", @@ -391,9 +390,8 @@ " image = query(payload)\n", " return image\n", "\n", - "query({\n", - "\t\"inputs\": \"Astronaut riding a horse\"\n", - "})" + "\n", + "query({\"inputs\": \"Astronaut riding a horse\"})" ] }, { @@ -426,9 +424,10 @@ "def generate_image(row):\n", " caption = row[\"original_caption\"]\n", " row[\"image_1\"] = query({\"inputs\": caption})\n", - " row[\"image_2\"] = query({\"inputs\": caption + \" \"}) # space to avoid caching and getting the same image\n", + " row[\"image_2\"] = query({\"inputs\": caption + \" \"}) # space to avoid caching and getting the same image\n", " return row\n", - " \n", + "\n", + "\n", "hf_dataset_with_images = hf_dataset.map(generate_image, batched=False)\n", "\n", "hf_dataset_with_images" @@ -451,11 +450,13 @@ "source": [ "model = SentenceTransformer(\"TaylorAI/bge-micro-v2\")\n", "\n", + "\n", "def encode_questions(batch):\n", " vectors_as_numpy = model.encode(batch[\"original_caption\"])\n", " batch[\"original_caption_vector\"] = [x.tolist() for x in vectors_as_numpy]\n", " return batch\n", "\n", + "\n", "hf_dataset_with_images_vectors = hf_dataset_with_images.map(encode_questions, batched=True)" ] }, @@ -474,11 +475,14 @@ "metadata": {}, "outputs": [], "source": [ - "dataset.records.log(records=hf_dataset_with_images_vectors, mapping={\n", - " \"key\": \"id\",\n", - " \"original_caption\": \"caption\",\n", - " \"url\": \"image_original\",\n", - "})" + "dataset.records.log(\n", + " records=hf_dataset_with_images_vectors,\n", + " mapping={\n", + " \"key\": \"id\",\n", + " \"original_caption\": \"caption\",\n", + " \"url\": \"image_original\",\n", + " },\n", + ")" ] }, { diff --git a/argilla/docs/tutorials/token_classification.ipynb b/argilla/docs/tutorials/token_classification.ipynb index 71f457f69a..79d74c2dfe 100644 --- a/argilla/docs/tutorials/token_classification.ipynb +++ b/argilla/docs/tutorials/token_classification.ipynb @@ -309,9 +309,7 @@ "source": [ "def predict_gliner(model, text, labels, threshold):\n", " entities = model.predict_entities(text, labels, threshold)\n", - " return [\n", - " {k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities\n", - " ]" + " return [{k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities]" ] }, { @@ -330,9 +328,7 @@ "data = dataset.records.to_list(flatten=True)\n", "updated_data = [\n", " {\n", - " \"span_label\": predict_gliner(\n", - " model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70\n", - " ),\n", + " \"span_label\": predict_gliner(model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70),\n", " \"id\": sample[\"id\"],\n", " }\n", " for sample in data\n", diff --git a/argilla/src/argilla/records/_io/_datasets.py b/argilla/src/argilla/records/_io/_datasets.py index 975816cca2..30513f1848 100644 --- a/argilla/src/argilla/records/_io/_datasets.py +++ b/argilla/src/argilla/records/_io/_datasets.py @@ -263,8 +263,7 @@ def to_argilla(hf_dataset: "HFDataset", mapper: "IngestedRecordMapper") -> "HFDa if id_column_name not in hf_dataset.column_names: split = hf_dataset.split warnings.warn( - message="Record id column not found in Hugging Face dataset. " - "Using row index and split for record ids.", + message="Record id column not found in Hugging Face dataset. Using row index and split for record ids.", ) hf_dataset = hf_dataset.map( diff --git a/docs/_source/getting_started/quickstart_workflow.ipynb b/docs/_source/getting_started/quickstart_workflow.ipynb index 9d13063ce3..1e84d37b6d 100644 --- a/docs/_source/getting_started/quickstart_workflow.ipynb +++ b/docs/_source/getting_started/quickstart_workflow.ipynb @@ -134,8 +134,8 @@ "outputs": [], "source": [ "# Argilla credentials\n", - "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", - "api_key = DEFAULT_API_KEY # admin.apikey\n", + "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", + "api_key = DEFAULT_API_KEY # admin.apikey\n", "# Huggingface credentials\n", "hf_token = \"hf_...\"" ] @@ -177,6 +177,7 @@ }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -187,14 +188,18 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -330,6 +335,7 @@ { "cell_type": "code", "execution_count": 36, + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "outputs": [ { @@ -350,6 +356,7 @@ }, { "cell_type": "markdown", + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "source": [ "As we can see, the dataset has two columns: `text` and `label`. We will use the label as the annotation of our record. Thus, to match the required attributes of a `TextClassificationRecord`, we need to rename the columns." @@ -358,6 +365,7 @@ { "cell_type": "code", "execution_count": 76, + "id": "72eea5119410473aa328ad9291626812", "metadata": {}, "outputs": [], "source": [ @@ -366,6 +374,7 @@ }, { "cell_type": "markdown", + "id": "8edb47106e1a46a883d545849b8ab81b", "metadata": {}, "source": [ "Now, we can inspect our dataset." @@ -374,6 +383,7 @@ { "cell_type": "code", "execution_count": 77, + "id": "10185d26023b46108eb7d9f57d49d2b3", "metadata": {}, "outputs": [ { @@ -439,6 +449,7 @@ }, { "cell_type": "markdown", + "id": "8763a12b2bbd4a93a75aff182afb95dc", "metadata": {}, "source": [ "Once, we checked that everything is correct, we can convert it to an Argilla dataset." @@ -447,6 +458,7 @@ { "cell_type": "code", "execution_count": null, + "id": "7623eae2785240b9bd12b16a66d81610", "metadata": {}, "outputs": [], "source": [ @@ -621,6 +633,7 @@ }, { "cell_type": "markdown", + "id": "7cdc8c89c7104fffa095e18ddfef8986", "metadata": {}, "source": [ "As the label is not needed in this case, we will add it as metadata." @@ -629,14 +642,16 @@ { "cell_type": "code", "execution_count": null, + "id": "b118ea5561624da68c537baed56e602f", "metadata": {}, "outputs": [], "source": [ "def metadata_to_dict(row):\n", - " metadata = {}\n", - " metadata[\"label\"] = row[\"label\"]\n", - " row['metadata'] = metadata\n", - " return row\n", + " metadata = {}\n", + " metadata[\"label\"] = row[\"label\"]\n", + " row[\"metadata\"] = metadata\n", + " return row\n", + "\n", "\n", "dataset = dataset.map(metadata_to_dict, remove_columns=[\"label\"])" ] @@ -692,6 +707,7 @@ "# Load a english spaCy model to tokenize our text\n", "nlp = spacy.load(\"en_core_web_sm\")\n", "\n", + "\n", "# Define our tokenize function\n", "def tokenize(row):\n", " tokens = [token.text for token in nlp(row[\"text\"])]\n", @@ -915,7 +931,11 @@ "from datasets import load_dataset\n", "\n", "# Load the Dataset from the Hugging Face Hub and extract a subset of the train split as example\n", - "dataset = load_dataset(\"europa_ecdc_tm\", \"en2fr\", split=\"train\").shuffle(seed=30).select(range(100))" + "dataset = (\n", + " load_dataset(\"europa_ecdc_tm\", \"en2fr\", split=\"train\")\n", + " .shuffle(seed=30)\n", + " .select(range(100))\n", + ")" ] }, { @@ -999,11 +1019,11 @@ "source": [ "# Define our helper extract function\n", "def extract(row):\n", - " return {\"text\": row[\"translation\"][\"en\"], \"prediction\":[row[\"translation\"][\"fr\"]]}\n", + " return {\"text\": row[\"translation\"][\"en\"], \"prediction\": [row[\"translation\"][\"fr\"]]}\n", "\n", "\n", "# Map the extract function to our dataset\n", - "dataset = dataset.map(extract, remove_columns = [\"translation\"])" + "dataset = dataset.map(extract, remove_columns=[\"translation\"])" ] }, { @@ -1463,10 +1483,7 @@ "sentence = \"I love this film, but the new remake is terrible.\"\n", "\n", "trainer = ArgillaTrainer(\n", - " name=\"imdb\",\n", - " workspace=\"argilla\",\n", - " framework=\"spacy\",\n", - " train_size=0.8\n", + " name=\"imdb\", workspace=\"argilla\", framework=\"spacy\", train_size=0.8\n", ")\n", "trainer.update_config(max_epochs=1, max_steps=1)\n", "trainer.train(output_dir=\"my_easy_model\")\n", diff --git a/docs/_source/getting_started/quickstart_workflow_feedback.ipynb b/docs/_source/getting_started/quickstart_workflow_feedback.ipynb index 1fdfcb624a..c26a23b16a 100644 --- a/docs/_source/getting_started/quickstart_workflow_feedback.ipynb +++ b/docs/_source/getting_started/quickstart_workflow_feedback.ipynb @@ -142,6 +142,7 @@ ], "source": [ "import argilla as rg\n", + "\n", "rg.init(api_url=api_url, api_key=api_key)\n", "\n", "# # If you want to use your private HF Space\n", @@ -150,6 +151,7 @@ }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -160,14 +162,18 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -292,7 +298,7 @@ " fields={\n", " \"text\": \"I feel sad today\",\n", " },\n", - " )\n", + " ),\n", "]\n", "dataset.add_records(records)" ] @@ -349,7 +355,9 @@ "outputs": [], "source": [ "# Besides Argilla, it can also be imported with load_dataset from datasets\n", - "dataset_hf = rg.FeedbackDataset.from_huggingface(\"argilla/emotion\", split=\"train[1:101]\")" + "dataset_hf = rg.FeedbackDataset.from_huggingface(\n", + " \"argilla/emotion\", split=\"train[1:101]\"\n", + ")" ] }, { @@ -396,8 +404,7 @@ "from argilla.feedback import TrainingTask\n", "\n", "task = TrainingTask.for_text_classification(\n", - " text=dataset_hf.field_by_name(\"text\"),\n", - " label=dataset_hf.question_by_name(\"label\")\n", + " text=dataset_hf.field_by_name(\"text\"), label=dataset_hf.question_by_name(\"label\")\n", ")" ] }, @@ -421,10 +428,7 @@ "from argilla.feedback import ArgillaTrainer\n", "\n", "trainer = ArgillaTrainer(\n", - " dataset=dataset_hf,\n", - " task=task,\n", - " framework=\"setfit\",\n", - " train_size=0.8\n", + " dataset=dataset_hf, task=task, framework=\"setfit\", train_size=0.8\n", ")" ] }, diff --git a/docs/_source/practical_guides/annotation_workflows/add_text_descriptives_as_metadata.ipynb b/docs/_source/practical_guides/annotation_workflows/add_text_descriptives_as_metadata.ipynb index 3eb9e80312..20ba8c2711 100644 --- a/docs/_source/practical_guides/annotation_workflows/add_text_descriptives_as_metadata.ipynb +++ b/docs/_source/practical_guides/annotation_workflows/add_text_descriptives_as_metadata.ipynb @@ -98,7 +98,9 @@ "outputs": [], "source": [ "import argilla as rg\n", - "from argilla.client.feedback.integrations.textdescriptives import TextDescriptivesExtractor\n", + "from argilla.client.feedback.integrations.textdescriptives import (\n", + " TextDescriptivesExtractor,\n", + ")\n", "\n", "from datasets import load_dataset" ] @@ -119,11 +121,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -146,7 +144,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -169,9 +167,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -333,10 +334,10 @@ "source": [ "# Initialize the TextDescriptivesExtractor\n", "tde = TextDescriptivesExtractor(\n", - " model = \"en\",\n", - " metrics = None,\n", - " visible_for_annotators = False,\n", - " show_progress = True,\n", + " model=\"en\",\n", + " metrics=None,\n", + " visible_for_annotators=False,\n", + " show_progress=True,\n", ")" ] }, @@ -431,10 +432,10 @@ "source": [ "# Initialize the TextDescriptivesExtractor\n", "tde = TextDescriptivesExtractor(\n", - " model = \"en\",\n", - " metrics = [\"descriptive_stats\", \"readability\"],\n", - " visible_for_annotators = True,\n", - " show_progress = True,\n", + " model=\"en\",\n", + " metrics=[\"descriptive_stats\", \"readability\"],\n", + " visible_for_annotators=True,\n", + " show_progress=True,\n", ")" ] }, diff --git a/docs/_source/practical_guides/annotation_workflows/weak_supervision.ipynb b/docs/_source/practical_guides/annotation_workflows/weak_supervision.ipynb index 0370b6a308..b5a0721e35 100644 --- a/docs/_source/practical_guides/annotation_workflows/weak_supervision.ipynb +++ b/docs/_source/practical_guides/annotation_workflows/weak_supervision.ipynb @@ -274,7 +274,7 @@ "test_df = pd.read_csv(\"../../tutorials/notebooks/data/yt_comments_test.csv\")\n", "\n", "# preview data\n", - "train_df.head()\n" + "train_df.head()" ] }, { @@ -369,7 +369,7 @@ "subscribe = Rule(query=\"subscribe\", label=\"SPAM\")\n", "my = Rule(query=\"my\", label=\"SPAM\")\n", "song = Rule(query=\"song\", label=\"HAM\")\n", - "love = Rule(query=\"love\", label=\"HAM\")\n" + "love = Rule(query=\"love\", label=\"HAM\")" ] }, { @@ -390,6 +390,7 @@ "source": [ "import re\n", "\n", + "\n", "# rules defined as Python labeling functions\n", "def contains_http(record: rg.TextClassificationRecord):\n", " if \"http\" in record.inputs[\"text\"]:\n", @@ -403,7 +404,7 @@ "def regex_check_out(record: rg.TextClassificationRecord):\n", " return (\n", " \"SPAM\" if re.search(r\"check.*out\", record.inputs[\"text\"], flags=re.I) else None\n", - " )\n" + " )" ] }, { @@ -422,7 +423,9 @@ "metadata": {}, "outputs": [], "source": [ - "labeling_rules_df = pd.read_csv(\"../../_static/datasets/weak_supervision_tutorial/labeling_rules.csv\")" + "labeling_rules_df = pd.read_csv(\n", + " \"../../_static/datasets/weak_supervision_tutorial/labeling_rules.csv\"\n", + ")" ] }, { @@ -520,9 +523,7 @@ "source": [ "predefined_labeling_rules = []\n", "for index, row in labeling_rules_df.iterrows():\n", - " predefined_labeling_rules.append(\n", - " Rule(row[\"query\"], row[\"label\"])\n", - " )" + " predefined_labeling_rules.append(Rule(row[\"query\"], row[\"label\"]))" ] }, { @@ -544,27 +545,16 @@ "from argilla.labeling.text_classification import load_rules, add_rules, delete_rules\n", "\n", "# bundle our rules in a list\n", - "rules = [\n", - " check_out,\n", - " plz,\n", - " subscribe,\n", - " my,\n", - " song,\n", - " love\n", - "]\n", + "rules = [check_out, plz, subscribe, my, song, love]\n", "\n", - "labeling_functions = [ \n", - " contains_http,\n", - " short_comment,\n", - " regex_check_out\n", - "]\n", + "labeling_functions = [contains_http, short_comment, regex_check_out]\n", "\n", "# add rules to dataset\n", "add_rules(dataset=\"weak_supervision_yt\", rules=rules)\n", "\n", "\n", "# add the predefined rules loaded from external file\n", - "add_rules(dataset=\"weak_supervision_yt\", rules=predefined_labeling_rules)\n" + "add_rules(dataset=\"weak_supervision_yt\", rules=predefined_labeling_rules)" ] }, { @@ -583,7 +573,7 @@ "metadata": {}, "outputs": [], "source": [ - "# load all the rules available in the dataset including interactively defined in the UI \n", + "# load all the rules available in the dataset including interactively defined in the UI\n", "dataset_labeling_rules = load_rules(dataset=\"weak_supervision_yt\")\n", "\n", "# extend the labeling rules with labeling functions\n", @@ -865,7 +855,7 @@ "not_informative_rules = [\n", " Rule(\"rich\", \"SPAM\"),\n", " Rule(\"film\", \"HAM\"),\n", - " Rule(\"meeting\", \"HAM\")\n", + " Rule(\"meeting\", \"HAM\"),\n", "]" ] }, @@ -877,6 +867,7 @@ "outputs": [], "source": [ "from argilla.labeling.text_classification import delete_rules\n", + "\n", "delete_rules(dataset=\"weak_supervision_yt\", rules=not_informative_rules)" ] }, @@ -1185,7 +1176,7 @@ "from argilla.labeling.text_classification import MajorityVoter\n", "\n", "# instantiate the majority vote label model by simply providing the weak labels object\n", - "majority_model = MajorityVoter(weak_labels)\n" + "majority_model = MajorityVoter(weak_labels)" ] }, { @@ -1222,7 +1213,7 @@ ], "source": [ "# check its performance\n", - "print(majority_model.score(output_str=True))\n" + "print(majority_model.score(output_str=True))" ] }, { @@ -1252,7 +1243,7 @@ "\n", "# accuracy without abstentions: 0.96; accuracy of random classifier: 0.5\n", "print(\"accuracy_c:\", frac_non * 0.96 + frac_abs * 0.5)\n", - "# accuracy_c: 0.868\n" + "# accuracy_c: 0.868" ] }, { @@ -1295,7 +1286,7 @@ "# extract training data\n", "training_data = pd.DataFrame(\n", " [{\"text\": rec.text, \"label\": rec.prediction[0][0]} for rec in records_for_training]\n", - ")\n" + ")" ] }, { @@ -1414,7 +1405,7 @@ ], "source": [ "# preview training data\n", - "training_data\n" + "training_data" ] }, { @@ -1452,7 +1443,7 @@ "snorkel_model = Snorkel(weak_labels)\n", "\n", "# we fit the model\n", - "snorkel_model.fit(lr=0.001, n_epochs=50)\n" + "snorkel_model.fit(lr=0.001, n_epochs=50)" ] }, { @@ -1503,7 +1494,7 @@ ], "source": [ "# we check its performance\n", - "print(snorkel_model.score(output_str=True))\n" + "print(snorkel_model.score(output_str=True))" ] }, { @@ -1529,7 +1520,7 @@ "\n", "# accuracy without abstentions: 0.95; accuracy of random classifier: 0.5\n", "print(\"accuracy_c:\", frac_non * 0.95 + frac_abs * 0.5)\n", - "# accuracy_c: 0.8761999999999999\n" + "# accuracy_c: 0.8761999999999999" ] }, { @@ -1560,7 +1551,7 @@ "# extract training data\n", "training_data = pd.DataFrame(\n", " [{\"text\": rec.text, \"label\": rec.prediction[0][0]} for rec in records_for_training]\n", - ")\n" + ")" ] }, { @@ -1775,7 +1766,7 @@ ], "source": [ "# we check its performance\n", - "print(flyingsquid_model.score(output_str=True))\n" + "print(flyingsquid_model.score(output_str=True))" ] }, { @@ -2101,7 +2092,7 @@ " X_test=X_test,\n", " Y_test=weak_labels.annotation(),\n", " batch_size=8,\n", - ")\n" + ")" ] }, { @@ -2137,7 +2128,7 @@ "trainer.fit(\n", " model=weasel,\n", " datamodule=datamodule,\n", - ")\n" + ")" ] }, { @@ -2158,7 +2149,7 @@ "outputs": [], "source": [ "trainer.test()\n", - "# {'accuracy': 0.94, ...}\n" + "# {'accuracy': 0.94, ...}" ] }, { @@ -2184,7 +2175,7 @@ "predicted_probs, predicted_label = weasel.predict(tokenizer(text, return_tensors=\"pt\"))\n", "\n", "# Map predicted int to label\n", - "weak_labels.int2label[int(predicted_label)] # HAM\n" + "weak_labels.int2label[int(predicted_label)] # HAM" ] }, { @@ -2214,7 +2205,7 @@ ")\n", "\n", "# use pipeline for predictions\n", - "classifier(text) # [{'label': 'HAM', 'score': 0.6110987663269043}]\n" + "classifier(text) # [{'label': 'HAM', 'score': 0.6110987663269043}]" ] } ], diff --git a/docs/_source/tutorials/notebooks/deploying-text2text-dvc-explainability.ipynb b/docs/_source/tutorials/notebooks/deploying-text2text-dvc-explainability.ipynb index b2ec1c9759..afb95fb698 100644 --- a/docs/_source/tutorials/notebooks/deploying-text2text-dvc-explainability.ipynb +++ b/docs/_source/tutorials/notebooks/deploying-text2text-dvc-explainability.ipynb @@ -128,10 +128,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -154,7 +151,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -198,9 +195,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -383,26 +383,26 @@ } ], "source": [ - "\n", - "def dataset_backupper(datasets: List[str], duration: int=60*60*24): \n", + "def dataset_backupper(datasets: List[str], duration: int = 60 * 60 * 24):\n", " while True:\n", " # load datasets and save as .pkl files\n", - " for dataset_name in datasets: \n", + " for dataset_name in datasets:\n", " ds = rg.load(dataset_name)\n", " df = ds.to_pandas()\n", " df.to_pickle(f\"data/{dataset_name}.pkl\")\n", "\n", " # get all .pkl files using glob\n", - " files = glob.glob('data/*.pkl', recursive=True)\n", - " [os.system(f'dvc add {file}') for file in files]\n", - " \n", + " files = glob.glob(\"data/*.pkl\", recursive=True)\n", + " [os.system(f\"dvc add {file}\") for file in files]\n", + "\n", " # push all .pkl.dvc files to github via git push\n", " os.system(\"dvc push\")\n", " os.system(\"git commit -m 'update DVC files'\")\n", " os.system(\"git push\")\n", - " \n", + "\n", " time.sleep(duration)\n", - " \n", + "\n", + "\n", "dataset_backupper([\"argilla-dvc\"])" ] }, diff --git a/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb b/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb index cfa6236721..133d6e5545 100644 --- a/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb +++ b/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb @@ -67,12 +67,14 @@ "!nvidia-smi\n", "# info on available ram\n", "from psutil import virtual_memory\n", + "\n", "ram_gb = virtual_memory().total / 1e9\n", - "print('\\n\\nYour runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))" + "print(\"\\n\\nYour runtime has {:.1f} gigabytes of available RAM\\n\".format(ram_gb))" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -83,14 +85,18 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -147,6 +153,7 @@ "outputs": [], "source": [ "import time\n", + "\n", "time.sleep(30) # sleeping to give ES time to set up. Otherwise downstream code will bug" ] }, @@ -236,10 +243,12 @@ "from pyngrok import ngrok, conf\n", "\n", "print(\"Enter your authtoken, which can be copied from https://dashboard.ngrok.com/auth\")\n", - "print(\"You need to create a free ngrok account to get an authtoken. The token looks something like this: ASDO1283YZaDu95vysXYIUXZXYRR_54YfASDIb8cpNfVoz349587\")\n", + "print(\n", + " \"You need to create a free ngrok account to get an authtoken. The token looks something like this: ASDO1283YZaDu95vysXYIUXZXYRR_54YfASDIb8cpNfVoz349587\"\n", + ")\n", "conf.get_default().auth_token = getpass.getpass()\n", - "# if the above does not work, you can try: \n", - "#ngrok.set_auth_token(\"\")" + "# if the above does not work, you can try:\n", + "# ngrok.set_auth_token(\"\")" ] }, { @@ -255,12 +264,18 @@ "[ngrok.disconnect(tunnel.public_url) for tunnel in ngrok.get_tunnels()]\n", "\n", "# create the public link\n", - "# ! check whether this is actually the localhost port Argilla is running on via the terminal above \n", - "ngrok_tunnel = ngrok.connect(6900) # insert the port number Argilla is running on. e.g. 6900 if the terminal displays something like \"Uvicorn running on http://0.0.0.0:6900\"\n", - "print(\"You can now access the Argilla localhost with the public link below. (It should look something like 'http://X03b-34-XXX-237-25.ngrok.io')\\n\")\n", + "# ! check whether this is actually the localhost port Argilla is running on via the terminal above\n", + "ngrok_tunnel = ngrok.connect(\n", + " 6900\n", + ") # insert the port number Argilla is running on. e.g. 6900 if the terminal displays something like \"Uvicorn running on http://0.0.0.0:6900\"\n", + "print(\n", + " \"You can now access the Argilla localhost with the public link below. (It should look something like 'http://X03b-34-XXX-237-25.ngrok.io')\\n\"\n", + ")\n", "print(f\"Your ngrok public link: {ngrok_tunnel}\\n\")\n", "print(\"After clicking on the link, there will be a warning, which you can ignore\")\n", - "print(\"You can then login with the default argilla username 'argilla' and password '1234'\")" + "print(\n", + " \"You can then login with the default argilla username 'argilla' and password '1234'\"\n", + ")" ] }, { @@ -294,10 +309,11 @@ "source": [ "# load dataset\n", "import datasets\n", + "\n", "dataset_name = \"trec\"\n", "dataset_hf = datasets.load_dataset(dataset_name, version=datasets.Version(\"2.0.0\"))\n", "# we work with only a sixth of the texts of the dataset for faster testing\n", - "dataset_hf[\"train\"] = dataset_hf[\"train\"].shard(num_shards=6, index=0)\n" + "dataset_hf[\"train\"] = dataset_hf[\"train\"].shard(num_shards=6, index=0)" ] }, { @@ -316,13 +332,17 @@ "# Choose transformer model: In non-gpu environments we use a tiny model to increase efficiency\n", "if not torch.cuda.is_available():\n", " transformer_model = \"prajjwal1/bert-tiny\"\n", - " print(f\"No GPU is available, we therefore use the small model '{transformer_model}' for the active learning loop.\\n\")\n", + " print(\n", + " f\"No GPU is available, we therefore use the small model '{transformer_model}' for the active learning loop.\\n\"\n", + " )\n", "else:\n", - " transformer_model = \"microsoft/deberta-v3-xsmall\" #\"bert-base-uncased\"\n", - " print(f\"A GPU is available, we can therefore use '{transformer_model}' for the active learning loop.\\n\")\n", + " transformer_model = \"microsoft/deberta-v3-xsmall\" # \"bert-base-uncased\"\n", + " print(\n", + " f\"A GPU is available, we can therefore use '{transformer_model}' for the active learning loop.\\n\"\n", + " )\n", "\n", "# Init tokenizer\n", - "tokenizer = AutoTokenizer.from_pretrained(transformer_model)\n" + "tokenizer = AutoTokenizer.from_pretrained(transformer_model)" ] }, { @@ -355,8 +375,7 @@ "\n", "dataset_test = TransformersDataset.from_arrays(\n", " test_text, test_labels, tokenizer, target_labels=np.arange(num_classes)\n", - ")\n", - "\n" + ")" ] }, { @@ -384,8 +403,13 @@ "clf_factory = TransformerBasedClassificationFactory(\n", " TransformerModelArguments(transformer_model),\n", " num_classes=num_classes,\n", - " kwargs={\"device\": device, \"num_epochs\": num_epochs, \"lr\": 2e-05, \"mini_batch_size\": 8,\n", - " \"early_stopping_no_improvement\": 5}\n", + " kwargs={\n", + " \"device\": device,\n", + " \"num_epochs\": num_epochs,\n", + " \"lr\": 2e-05,\n", + " \"mini_batch_size\": 8,\n", + " \"early_stopping_no_improvement\": 5,\n", + " },\n", ")\n", "\n", "\n", @@ -393,7 +417,7 @@ "query_strategy = BreakingTies()\n", "\n", "# Use the active learner with a pool containing all unlabeled data\n", - "active_learner = PoolBasedActiveLearner(clf_factory, query_strategy, dataset_st)\n" + "active_learner = PoolBasedActiveLearner(clf_factory, query_strategy, dataset_st)" ] }, { @@ -407,7 +431,11 @@ "source": [ "## draw an initial sample for the first annotation round\n", "# https://small-text.readthedocs.io/en/v1.1.1/components/initialization.html\n", - "from small_text import random_initialization, random_initialization_stratified, random_initialization_balanced\n", + "from small_text import (\n", + " random_initialization,\n", + " random_initialization_stratified,\n", + " random_initialization_balanced,\n", + ")\n", "import numpy as np\n", "\n", "# Fix seed for reproducibility\n", @@ -417,9 +445,9 @@ "NUM_SAMPLES = 10\n", "\n", "# Draw an initial subset from the data pool\n", - "#initial_indices = random_initialization(dataset_st, NUM_SAMPLES)\n", - "#initial_indices = random_initialization_balanced(train_labels, NUM_SAMPLES)\n", - "initial_indices = random_initialization_stratified(train_labels, NUM_SAMPLES)\n" + "# initial_indices = random_initialization(dataset_st, NUM_SAMPLES)\n", + "# initial_indices = random_initialization_balanced(train_labels, NUM_SAMPLES)\n", + "initial_indices = random_initialization_stratified(train_labels, NUM_SAMPLES)" ] }, { @@ -455,7 +483,7 @@ "]\n", "\n", "# Log initial records to Argilla\n", - "rg.log(records, DATASET_NAME)\n" + "rg.log(records, DATASET_NAME)" ] }, { @@ -475,6 +503,7 @@ "LABEL2INT = dataset_hf[\"train\"].features[\"coarse_label\"].str2int\n", "ACCURACIES = []\n", "\n", + "\n", "# Set up the active learning loop with the listener decorator\n", "@listener(\n", " dataset=DATASET_NAME,\n", @@ -527,8 +556,7 @@ " print(\"Waiting for annotations ...\")\n", "\n", "\n", - "\n", - "active_learning_loop.start()\n" + "active_learning_loop.start()" ] }, { @@ -564,7 +592,7 @@ ], "source": [ "print(f\"You can now start annotating with active learning in the background!\")\n", - "print(f\"The public link for accessing the annotation interface is: {ngrok_tunnel}\")\n" + "print(f\"The public link for accessing the annotation interface is: {ngrok_tunnel}\")" ] }, { @@ -603,7 +631,8 @@ "source": [ "# plot learning progress over different active learning iterations\n", "import pandas as pd\n", - "pd.Series(ACCURACIES).plot(xlabel=\"Iteration\", ylabel=\"Accuracy\")\n" + "\n", + "pd.Series(ACCURACIES).plot(xlabel=\"Iteration\", ylabel=\"Accuracy\")" ] }, { diff --git a/docs/_source/tutorials/notebooks/deploying-texttokenclassification-fastapi.ipynb b/docs/_source/tutorials/notebooks/deploying-texttokenclassification-fastapi.ipynb index 7d1d573230..c03057a4db 100644 --- a/docs/_source/tutorials/notebooks/deploying-texttokenclassification-fastapi.ipynb +++ b/docs/_source/tutorials/notebooks/deploying-texttokenclassification-fastapi.ipynb @@ -129,10 +129,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -155,7 +152,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -186,7 +183,7 @@ "\n", "# for adding logging to API endpoints\n", "from argilla.monitoring.asgi import (\n", - " ArgillaLogHTTPMiddleware, \n", + " ArgillaLogHTTPMiddleware,\n", " text_classification_mapper,\n", " token_classification_mapper,\n", ")\n", @@ -212,9 +209,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -435,11 +435,12 @@ " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/transformers/\", # the endpoint that will be logged\n", " dataset=\"monitoring_transformers\", # your dataset name\n", - " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", + " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", ")" ] }, @@ -467,6 +468,7 @@ " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/spacy/\",\n", @@ -474,6 +476,7 @@ " records_mapper=token2records,\n", ")\n", "\n", + "\n", "# prediction endpoint using spacy pipeline\n", "@app.post(\"/ner/\")\n", "def predict_spacy(batch: List[str]):\n", @@ -586,8 +589,8 @@ "import requests\n", "\n", "response = requests.post(\n", - " \"http://localhost:8000/sentiment/\", \n", - " json=[\"I like Argilla\", \"I hated data labelling but now I don't\"]\n", + " \"http://localhost:8000/sentiment/\",\n", + " json=[\"I like Argilla\", \"I hated data labelling but now I don't\"],\n", ")\n", "\n", "response.content" @@ -634,7 +637,7 @@ "from transformers import pipeline\n", "\n", "from argilla.monitoring.asgi import (\n", - " ArgillaLogHTTPMiddleware, \n", + " ArgillaLogHTTPMiddleware,\n", " text_classification_mapper,\n", " token_classification_mapper,\n", ")\n", @@ -644,6 +647,7 @@ "\n", "app = FastAPI()\n", "\n", + "\n", "# prediction endpoint using transformers pipeline\n", "@app.post(\"/sentiment/\")\n", "def predict_transformers(batch: List[str]):\n", @@ -656,25 +660,29 @@ " for prediction in predictions\n", " ]\n", "\n", + "\n", "def text2records(batch: List[str], outputs: List[dict]):\n", " return [\n", " text_classification_mapper(data, prediction)\n", " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/transformers/\", # the endpoint that will be logged\n", " dataset=\"monitoring_transformers\", # your dataset name\n", - " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", + " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", ")\n", "\n", + "\n", "def token2records(batch: List[str], outputs: List[dict]):\n", " return [\n", " token_classification_mapper(data, prediction)\n", " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "# prediction endpoint using spacy pipeline\n", "@app.post(\"/ner/\")\n", "def predict_spacy(batch: List[str]):\n", @@ -694,6 +702,7 @@ " predictions.append(prediction)\n", " return predictions\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/ner/\",\n", @@ -708,14 +717,13 @@ " records_mapper=text2records,\n", ")\n", "\n", + "\n", "@app.get(\"/\")\n", "def root():\n", " return {\"message\": \"alive\"}\n", "\n", - "argilla.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "\n", + "argilla.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] } ], diff --git a/docs/_source/tutorials/notebooks/labelling-spacy-llm.ipynb b/docs/_source/tutorials/notebooks/labelling-spacy-llm.ipynb index a1ece5425a..c6682dce90 100644 --- a/docs/_source/tutorials/notebooks/labelling-spacy-llm.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-spacy-llm.ipynb @@ -108,10 +108,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\",\n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -134,7 +131,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -157,9 +154,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -247,8 +247,8 @@ "config = configparser.ConfigParser()\n", "config.read_string(config_string)\n", "\n", - "with open(\"config.cfg\", 'w') as configfile:\n", - " config.write(configfile)" + "with open(\"config.cfg\", \"w\") as configfile:\n", + " config.write(configfile)" ] }, { @@ -299,20 +299,21 @@ "metadata": {}, "outputs": [], "source": [ - "#returns the category with the highest score\n", + "# returns the category with the highest score\n", "def get_textcat_suggestion(doc):\n", - " model_prediction = doc.cats\n", - " return max(model_prediction, key=model_prediction.get)\n", + " model_prediction = doc.cats\n", + " return max(model_prediction, key=model_prediction.get)\n", + "\n", "\n", - "#selects the top N sentences with the highest scores and return combined string\n", + "# selects the top N sentences with the highest scores and return combined string\n", "def get_summarization_suggestion(doc):\n", - " sentence_scores = Counter()\n", - " for sentence in doc.sents:\n", - " for word in sentence:\n", - " sentence_scores[sentence] += 1\n", - " summary_sentences = nlargest(2, sentence_scores, key=sentence_scores.get)\n", - " summary = ' '.join(str(sentence) for sentence in summary_sentences)\n", - " return summary" + " sentence_scores = Counter()\n", + " for sentence in doc.sents:\n", + " for word in sentence:\n", + " sentence_scores[sentence] += 1\n", + " summary_sentences = nlargest(2, sentence_scores, key=sentence_scores.get)\n", + " summary = \" \".join(str(sentence) for sentence in summary_sentences)\n", + " return summary" ] }, { @@ -354,21 +355,16 @@ "outputs": [], "source": [ "dataset = rg.FeedbackDataset(\n", - " fields=[\n", - " rg.TextField(name=\"text\")\n", - " ],\n", + " fields=[rg.TextField(name=\"text\")],\n", " questions=[\n", " rg.LabelQuestion(\n", " name=\"label-question\",\n", " title=\"Classify the text category.\",\n", - " #make sure that the labels are in line with the labels we have defined in config.cfg\n", - " labels=[\"HISTORY\",\"MUSIC\",\"TECHNOLOGY\",\"SCIENCE\",\"SPORTS\",\"POLITICS\"]\n", + " # make sure that the labels are in line with the labels we have defined in config.cfg\n", + " labels=[\"HISTORY\", \"MUSIC\", \"TECHNOLOGY\", \"SCIENCE\", \"SPORTS\", \"POLITICS\"],\n", " ),\n", - " rg.TextQuestion(\n", - " name=\"text-question\",\n", - " title=\"Provide a summary for the text.\"\n", - " )\n", - " ]\n", + " rg.TextQuestion(name=\"text-question\", title=\"Provide a summary for the text.\"),\n", + " ],\n", ")" ] }, @@ -387,16 +383,16 @@ "source": [ "records = [\n", " rg.FeedbackRecord(\n", - " fields={\n", - " \"text\": doc.text\n", - " },\n", + " fields={\"text\": doc.text},\n", " suggestions=[\n", - " {\"question_name\": \"label-question\",\n", - " \"value\": get_textcat_suggestion(doc)},\n", - " {\"question_name\":\"text-question\",\n", - " \"value\": get_summarization_suggestion(doc)}\n", - " ]\n", - " ) for doc in [nlp(item) for item in dataset_hf[\"context\"]]\n", + " {\"question_name\": \"label-question\", \"value\": get_textcat_suggestion(doc)},\n", + " {\n", + " \"question_name\": \"text-question\",\n", + " \"value\": get_summarization_suggestion(doc),\n", + " },\n", + " ],\n", + " )\n", + " for doc in [nlp(item) for item in dataset_hf[\"context\"]]\n", "]" ] }, diff --git a/docs/_source/tutorials/notebooks/labelling-text2text-disaggregators-explainability.ipynb b/docs/_source/tutorials/notebooks/labelling-text2text-disaggregators-explainability.ipynb index 342b0e43c3..f66a5d254c 100644 --- a/docs/_source/tutorials/notebooks/labelling-text2text-disaggregators-explainability.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-text2text-disaggregators-explainability.ipynb @@ -120,10 +120,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -146,7 +143,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -190,9 +187,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -216,7 +216,9 @@ "dataset_rg = rg.read_datasets(my_dataset[\"train\"], task=\"Text2Text\")\n", "\n", "# log subset into argilla\n", - "rg.log(dataset_rg[:1000], \"news-summary\", chunk_size=50) # set smaller chunk size to overcome io-issues" + "rg.log(\n", + " dataset_rg[:1000], \"news-summary\", chunk_size=50\n", + ") # set smaller chunk size to overcome io-issues" ] }, { @@ -375,7 +377,7 @@ "metadata_ds = df[df.columns[1:]].to_dict(orient=\"records\")\n", "for metadata_rec, rec in zip(metadata_ds, ds):\n", " rec.metadata = metadata_rec\n", - "rg.log(ds, \"news-summary\", chunk_size=50) # upsert records" + "rg.log(ds, \"news-summary\", chunk_size=50) # upsert records" ] }, { diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-gpt3-fewshot.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-gpt3-fewshot.ipynb index 379c43915b..253bd5348c 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-gpt3-fewshot.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-gpt3-fewshot.ipynb @@ -115,14 +115,12 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -131,6 +129,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -141,7 +140,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -176,6 +175,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -186,14 +186,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -445,17 +449,15 @@ "outputs": [], "source": [ "# set your api key as ENV, for example with Python: os.environ[\"OPENAI_API_KEY\"]\n", - "openai.api_key = os.getenv(\"OPENAI_API_KEY\") \n", + "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n", + "\n", "\n", "def classify(text):\n", " # build prompt with template and input\n", " prompt = f\"{PROMPT_TEMPLATE}\\n{text}\\n\"\n", " # use create completion template\n", " completion = openai.Completion.create(\n", - " model=\"text-davinci-003\",\n", - " prompt=prompt,\n", - " temperature=0,\n", - " max_tokens=256\n", + " model=\"text-davinci-003\", prompt=prompt, temperature=0, max_tokens=256\n", " )\n", " # get first choice text\n", " json_response = completion[\"choices\"][0][\"text\"].strip()\n", @@ -463,8 +465,11 @@ " prediction = loads(json_response)\n", " except:\n", " # for some examples, json is not correctly formatted\n", - " return {\"prediction\": None, \"explanation\": f\"Wrong JSON format: {json_response}\" }\n", - " return prediction " + " return {\n", + " \"prediction\": None,\n", + " \"explanation\": f\"Wrong JSON format: {json_response}\",\n", + " }\n", + " return prediction" ] }, { @@ -682,7 +687,7 @@ "# let's predict over the test set to eval our zero-shot classifier\n", "test_ds_with_preds = banking_ds[\"test\"].map(lambda example: classify(example[\"text\"]))\n", "\n", - "pd.set_option('display.max_colwidth', None)\n", + "pd.set_option(\"display.max_colwidth\", None)\n", "test_ds_with_preds.to_pandas().head(15)" ] }, @@ -716,7 +721,7 @@ " record = rg.TextClassificationRecord(\n", " inputs={\"text\": example[\"text\"], \"explanation\": example[\"explanation\"]},\n", " annotation=labels[example[\"label\"]],\n", - " prediction=[(example[\"prediction\"].lower(), 1.0)]\n", + " prediction=[(example[\"prediction\"].lower(), 1.0)],\n", " )\n", " records.append(record)\n", "\n", @@ -1745,7 +1750,7 @@ ], "range": [ 0, - 0.9090909090909091 + 0.9090909090909092 ], "type": "linear" } @@ -5173,7 +5178,7 @@ ], "range": [ 0, - 0.9090909090909091 + 0.9090909090909092 ], "type": "linear" } diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-sentence-transformers-semantic.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-sentence-transformers-semantic.ipynb index 6281388bf5..3e2c1b7058 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-sentence-transformers-semantic.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-sentence-transformers-semantic.ipynb @@ -116,10 +116,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -142,7 +139,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -162,7 +159,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "from sentence_transformers import SentenceTransformer\n", "from datasets import load_dataset" ] @@ -184,9 +180,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -213,9 +212,9 @@ "\n", "# Encode text field using batched computation\n", "dataset = dataset.map(\n", - " lambda batch: {\"vectors\": encoder.encode(batch[\"text\"])}, \n", - " batch_size=32, \n", - " batched=True\n", + " lambda batch: {\"vectors\": encoder.encode(batch[\"text\"])},\n", + " batch_size=32,\n", + " batched=True,\n", ")\n", "\n", "# Removes the original labels because you'll be labeling from scratch\n", @@ -342,7 +341,16 @@ "\n", "# Our labeling scheme\n", "settings = rg.TextClassificationSettings(\n", - " label_schema=[\"change_details\", \"card\", \"atm\", \"top_up\", \"balance\", \"transfer\", \"exchange_rate\", \"pin\"]\n", + " label_schema=[\n", + " \"change_details\",\n", + " \"card\",\n", + " \"atm\",\n", + " \"top_up\",\n", + " \"balance\",\n", + " \"transfer\",\n", + " \"exchange_rate\",\n", + " \"pin\",\n", + " ]\n", ")\n", "\n", "rg.configure_dataset_settings(name=\"banking77-topics\", settings=settings)\n", diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb index 144af598c6..05c2a28437 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb @@ -1,10378 +1,10395 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "5PwXNf1WNYm4" - }, - "source": [ - "# 📸 Bulk Labeling Multimodal Data\n", - "\n", - "\n", - "In this tutorial, we will work with multimodal data of images and text. It will walk you through the following steps:\n", - "\n", - "- Load a dataset with images and text of electronic products.\n", - "- Experiment with zero-shot image and text classification.\n", - "- Label the data using bulk labelling with image and text embeddings.\n", - "- Train a SetFit classification model on the labelled data.\n", - "\n", - "\"Argilla\n" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "5PwXNf1WNYm4" + }, + "source": [ + "# 📸 Bulk Labeling Multimodal Data\n", + "\n", + "\n", + "In this tutorial, we will work with multimodal data of images and text. It will walk you through the following steps:\n", + "\n", + "- Load a dataset with images and text of electronic products.\n", + "- Experiment with zero-shot image and text classification.\n", + "- Label the data using bulk labelling with image and text embeddings.\n", + "- Train a SetFit classification model on the labelled data.\n", + "\n", + "\"Argilla\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running Argilla\n", + "\n", + "For this tutorial, you will need to have an Argilla server running. There are two main options for deploying and running Argilla:\n", + "\n", + "1. [Deploy Argilla on Hugging Face Spaces](https://huggingface.co/docs/hub/spaces-sdks-docker-argilla): This is the fastest option and the recommended choice for connecting to external notebooks (e.g., Google Colab) if you have an account on Hugging Face.\n", + "\n", + "2. [Launch Argilla using Argilla's quickstart Docker image](../../getting_started/quickstart.ipynb): This is the recommended option if you want Argilla running on your local machine. Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", + "\n", + "For more information on deployment options, please check the Deployment section of the documentation.\n", + "\n", + "
\n", + "\n", + "Tip\n", + " \n", + "This tutorial is a Jupyter Notebook. There are two options to run it:\n", + "\n", + "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", + "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", + "
\n", + "\n", + "## Setup\n", + "\n", + "For this tutorial, you'll need to install the Argilla client and a few third-party libraries using `pip`:" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "hCR_hYiWNYm7" + }, + "source": [ + "## Introduction\n", + "\n", + "__Real-world multimodal data__ is often a mix of text and images. In this tutorial, we will work with a dataset of electronic products. The dataset contains images of the products and a description of the product. \n", + "\n", + "This notebook uses a dataset of electronics parts and products from a fictional electronics webshop. \n", + "\n", + "Let's get started!" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "B-DDXAVxNYm7" + }, + "outputs": [], + "source": [ + "%pip install argilla \"setfit~=0.2.0\" \"datasets~=2.3.0\" transformers sentence-transformers -qqq" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "le5C4pzTNYm9" + }, + "source": [ + "Let's import the Argilla module for reading and writing data:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "KNr0WEVWNYm-" + }, + "outputs": [], + "source": [ + "import argilla as rg" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "J5ecm_MjNYm-" + }, + "source": [ + "If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fEDgKhSVNYm-" + }, + "outputs": [], + "source": [ + "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", + "# Replace api_key if you configured a custom API key\n", + "rg.init(api_url=\"https://localhost:6900\", api_key=\"admin.apikey\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Set the HF_TOKEN environment variable\n", + "# import os\n", + "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", + "\n", + "# # Replace api_url with the url to your HF Spaces URL\n", + "# # Replace api_key if you configured a custom API key\n", + "# rg.init(\n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", + "# api_key=\"admin.apikey\",\n", + "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", + "# )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "VgNib6EDNYm-" + }, + "source": [ + "Finally, let's include the imports we need:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pprint as pp\n", + "from requests import get\n", + "\n", + "from datasets import load_dataset\n", + "from PIL import Image\n", + "from sklearn.metrics import accuracy_score\n", + "from sentence_transformers import SentenceTransformer\n", + "from transformers import pipeline\n", + "from sentence_transformers.losses import CosineSimilarityLoss\n", + "from setfit import SetFitModel, SetFitTrainer\n", + "from PIL import Image" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from argilla.utils.telemetry import tutorial_running\n", + "\n", + " tutorial_running()\n", + "except ImportError:\n", + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "izZIHEzYNYnB" + }, + "source": [ + "## A 'real-world' multimodal dataset\n", + "\n", + "The dataset samples contain a `page_name`, `page_descriptions`, and a `label`. The dataset is split into two parts: `labelled` and `unlabelled`. The labelled portion is the result of my annotation so we can test methods. In reality, let's say this doesn't exist 😏. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 438, + "referenced_widgets": [ + "d2cf4d1133a2421aa1ec980bdca4fecd", + "bcfc02b30e024b5c81856bcd9e191d07", + "eaefc3e261a7453cb66905079b71f16a", + "ac546920996e4ca081e438999ae19fc8", + "c54e39be37434e0d826d86041ef1de96", + "f4d6914361f44912b0d7ca1b16496606", + "b3c787a117ae42738c315c5cdabd508c", + "edf41d1718984e7dbef2b65799935fd8", + "20dad493a55343efaabbc6f7c1966cf0", + "b91ae0e5a43143e68f53099e85f1afc2", + "a1ec5941f87f4b23bc72188313ac9275", + "6482c21eb5d449f795a9e39fcae6a46e", + "2db4287290eb4c25bc440fc3f2f1d258", + "b470309f2a6b4878852ae9e589764edf", + "63de802f18e44b16a3a2f5029ad7b705", + "670f91d2101c4d87997c228210646f89", + "d8f11947b0d048489bdd649479c50b1f", + "70eee61c4d3842f69569dee3ac8773fa", + "26175752e18140b4a944bfc55f77a069", + "c13ea0c47f6e48f0a804fa9f2f5ce544", + "9e17b92da1aa43d0aa980db275437fde", + "d2186c7599164e528aa3d92f6999a9bb", + "35f1224563044be39e254342c6c5b635", + "87a472fb7193452faea139ab6bf1c9be", + "eb9b58dab8044074b1cac3e39f26e561", + "6359430089654ca4bdd587c125b317a0", + "be06bfcbe41040369d025626b8227b6b", + "082dfef575c94327a5377c3f7278cf5a", + "84d1a26c298b42cd820bcd2e4302a443", + "0987a64b9cea4850998e78d76d31c3b8", + "fa0573f2ebd84a5ba5ebdc0452362887", + "e8e295d7292245fb9f17413599d59960", + "0286aff9c56b43e7abd15f17dd33d6db", + "8b161e12449f4078bb64245518bb596d", + "71181e8f913b412fb72917c55a151723", + "dcd3ba169df34a0395eb5d03405f8835", + "d0b05902a4db4abd8c10cab6178a59ec", + "8294163893bc40c2bef26032babb3938", + "735a35f35dee4d548cd6284f9e3e1f5a", + "b7dfa3f0d2374482af6dc25ec8dd65df", + "c65f5b1c90e24d47a060c0b362749b82", + "01bc8315e7cd4064a8653eae44cea192", + "36156c14c8d544deab0ea65ac5ba7508", + "15d85f4fe15545bc85d3717d7af7bbb1", + "f8d66c452a6d4347b9a8247ad135fd35", + "3c8aa1d02f1541bdb59b0a8dff27148f", + "88677a526eae40a1b5e0b5e9e7a25218", + "eaf5ad9e9e3240799128ef8bbf0a195a", + "48f63d3af7604e8abe4ca94dbae63f48", + "27d3f1981c244549901253b3266de1ff", + "9aae9636296a4245989388403103e0eb", + "139f9e8c84f447b985c2080c17d14a8c", + "1d37c4be099b44b5bc67c2cd976693b9", + "1ad0c7637293497a9a75bfa45c1a06e8", + "4fc202bfa55842b48e59d4a800026331", + "bbb9442a3ec1463790bcf6533f3be905", + "16bd920f956f43b3921b79cec4d76d87", + "c12bc29016124339a6c3cb11f21bfaf0", + "a6b61668ad1146e78c2a9b2556b23703", + "14cc3d3e93c54226bc2f38a4745c1d23", + "16fccf90f2974a938167677b189b67a2", + "714590bcc27e4efc837f870812929f3e", + "9cef8cafa31a47fd9f7410478b5415e8", + "2e4deb55d7b74e2fa09c9f925fd2e751", + "2433cd897d734c78b3d4a62602865415", + "9fac34a2d3e44c95a2591b26ddae5c1e", + "88edaefb70c34854ad6f16d5a82b40e7", + "ba93d6edfed5434397d869ba2bd7aca5", + "7282d9ef9a754bcfb1297081296aed5a", + "946c67b0a3f54ce09c35db7bef44991c", + "f41cf420defb4b109f836fecea8e1d15", + "c10b310f7f6344b39c8d165c887d28f1", + "052b62b792fc4f1183ffb91f18d655c7", + "c4a54f5262014b77964d42429d5e43bf", + "cf540757bc79428bab170e157eb9381f", + "03390d37eba24242ae6c2099d5fe7863", + "88901ee5ca86458288bf82a22db7e379", + "498becb6513a44a4a17cff76a8ae4666", + "5066a8fd18d94f9da0aa0cf354d13452", + "a8d41a62f1b94575a744c12a399de8c8", + "e8cc8dbe0d164a9eaec472ffe1351db9", + "0d3cbcf01ca14063be3ae129db78fdcb", + "1a951db095d441e295bb9ff00d4dba30", + "e120b072886b4d4f94ef506ca3d6a605", + "d5fea5e89ebd4d298c169526eadeeb32", + "8513e53c07c04d75999be1fec6d5b491", + "e7da6c65cec140c3801fca68bcfdebcc", + "eccef3d9147242d9a1e42f4abd0ebe4c", + "b77605d5afb94b8cb579f3c690ea4203", + "aedab95f1bb144c38180f2b2e71c14ff", + "49d11634ca864c04bc87e472d3563e73", + "20163aeba069412eabd3b506a339667f", + "feef17ea90ae4105b2ffb56841ed2adb", + "e97c5c6e969842468f53520dfba1ee8f", + "89b04536243a48049eb777f00fe49555", + "94502dbbc1ab42f48d2d375a8265dbb4", + "090c3e60ff0d4e5fa930767672802d86", + "b577441dde9e43cb9644244ca0cf336e", + "b850f11900ae4874963b35e0b86470e5", + "963fbb3d5b9f4848a8cadf2a2d2b264b", + "52ad436eea8649058457d6380f618ea2", + "5817239aa98046e9be3736688f8c1a5f", + "c48e02dc87d74d50ba6de4aefeb1441e", + "ecc225aab6804d97b1cdd383621c9695", + "fcfda905fd4448b3940b04f4136c53a4", + "16162c715d4745f3aa91c7f38a78a32d", + "4c7287d821f1471e9ebdafa2f8229d28", + "5a65206e09c04176bfd33bcb31cd1ad5", + "9ac85729093c41c1b2a0fef5191b8ab5", + "d774f350171b4c3fb6be768dbf11c726" + ] }, + "id": "gdVdxJt-NYnC", + "outputId": "c54aa613-806e-42fa-b4de-f374c78b1d50" + }, + "outputs": [], + "source": [ + "ELECTRONICS_DATASET = \"burtenshaw/electronics\"\n", + "dataset = load_dataset(ELECTRONICS_DATASET)\n", + "labels = dataset[\"labelled\"].features[\"label\"].names\n", + "int2str = dataset[\"labelled\"].features[\"label\"].int2str" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running Argilla\n", - "\n", - "For this tutorial, you will need to have an Argilla server running. There are two main options for deploying and running Argilla:\n", - "\n", - "1. [Deploy Argilla on Hugging Face Spaces](https://huggingface.co/docs/hub/spaces-sdks-docker-argilla): This is the fastest option and the recommended choice for connecting to external notebooks (e.g., Google Colab) if you have an account on Hugging Face.\n", - "\n", - "2. [Launch Argilla using Argilla's quickstart Docker image](../../getting_started/quickstart.ipynb): This is the recommended option if you want Argilla running on your local machine. Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", - "\n", - "For more information on deployment options, please check the Deployment section of the documentation.\n", - "\n", - "
\n", - "\n", - "Tip\n", - " \n", - "This tutorial is a Jupyter Notebook. There are two options to run it:\n", - "\n", - "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", - "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", - "
\n", - "\n", - "## Setup\n", - "\n", - "For this tutorial, you'll need to install the Argilla client and a few third-party libraries using `pip`:" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "{'image_url': 'https://tse1.mm.bing.net/th?id=OIP.to3Cddhws6ECl-_ySZ5ShQHaFi&pid=Api',\n", + " 'label': 1,\n", + " 'page_description': '\\n'\n", + " '\\n'\n", + " 'Are you looking for a way to reduce the number of '\n", + " 'purchase orders you need to place for cable assemblies? '\n", + " \"If so, then this guide is for you! We'll show you how to \"\n", + " 'source cable assemblies with fewer purchase orders, '\n", + " \"saving you time and money. We'll cover topics such as \"\n", + " 'understanding the different types of cable assemblies, '\n", + " 'researching suppliers, and negotiating the best prices. '\n", + " \"We'll also provide tips on how to streamline the \"\n", + " 'ordering process and ensure you get the best quality '\n", + " \"products. With this guide, you'll be able to source \"\n", + " 'cable assemblies with fewer purchase orders and get the '\n", + " 'most out of your budget.',\n", + " 'page_name': 'How to Source Cable Assemblies With Fewer Purchase Orders ...'}\n" + ] + } + ], + "source": [ + "# show a sample\n", + "pp.pprint(next(iter(dataset[\"labelled\"])))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🔫 Zero-Shot Classification" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "U9R2eaRXNYnC" + }, + "source": [ + "### 📷 Images\n", + "\n", + "First, we will explore some zero-shot techniques. For the sake of comparison, we will use the `labelled` portion of the dataset.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "hCR_hYiWNYm7" - }, - "source": [ - "## Introduction\n", - "\n", - "__Real-world multimodal data__ is often a mix of text and images. In this tutorial, we will work with a dataset of electronic products. The dataset contains images of the products and a description of the product. \n", - "\n", - "This notebook uses a dataset of electronics parts and products from a fictional electronics webshop. \n", - "\n", - "Let's get started!" - ] + "id": "SshZkW1eNYnD", + "outputId": "a59a4610-0e25-434a-a6fb-d33394293e57" + }, + "outputs": [], + "source": [ + "# to save time, we'll take a slice of the dataset\n", + "test_dataset = load_dataset(ELECTRONICS_DATASET, split=\"test[:20%]\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "r-2LBdNVNYnD", + "outputId": "6c3cb81f-95de-4530-d501-f8b43f031c7d" + }, + "outputs": [], + "source": [ + "# More models in the model hub.\n", + "model_name = \"openai/clip-vit-large-patch14\"\n", + "classifier = pipeline(\"zero-shot-image-classification\", model=model_name)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "FodfnkJgNYnE" + }, + "source": [ + "First, we can zero-shot classify one image from the dataset: " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 509 + }, + "id": "5FMETGuDNYnE", + "outputId": "508eb028-acdb-428c-a7b1-8f34e2cbdb79" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "B-DDXAVxNYm7" - }, - "outputs": [], - "source": [ - "%pip install argilla \"setfit~=0.2.0\" \"datasets~=2.3.0\" transformers sentence-transformers -qqq" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "{'label': 'switches', 'score': 0.9631496667861938}\n" + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "le5C4pzTNYm9" - }, - "source": [ - "Let's import the Argilla module for reading and writing data:" + "data": { + "image/png": "", + "text/plain": [ + "" ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "image_to_classify = next(iter(dataset[\"test\"]))[\"image_url\"]\n", + "scores = classifier(image_to_classify, candidate_labels=labels)\n", + "# show a sample\n", + "pp.pprint(scores[0])\n", + "Image.open(get(image_to_classify, stream=True).raw)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "vwkVzNhVNYnF" + }, + "source": [ + "Now we will test the accuracy and latency of a zero-shot image classifier on a sub-section of the `labelled` portion of the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102, + "referenced_widgets": [ + "a8ec0defdfe841caaf0caa4de07fb84a", + "14b5a9186b3a47108fafa070c06ab35c", + "3916f72e314d46e883db7e529210b098", + "34453701eb2f40df9a2a0154c02990d4", + "9e98837e01004eba948e9c2d3ea23e5f", + "f7d2ce646a4c42ed893383a4f75af2d6", + "2a967808803e44fea67bd9958542eb43", + "540bda296c904aa48c10db351a11ca66", + "c1f28cff3e064e46a8c9bdd0d705a099", + "fadb08eafb084a4e91f8406055beb502", + "8aae0197e477451a890b1f93cdeebe10" + ] }, + "id": "j_cJkBbbNYnF", + "outputId": "a228a4b3-4ba5-4014-8a6d-7166a688553a" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "KNr0WEVWNYm-" - }, - "outputs": [], - "source": [ - "import argilla as rg" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "J5ecm_MjNYm-" - }, - "source": [ - "If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 9min 20s, sys: 1.19 s, total: 9min 21s\n", + "Wall time: 2min 28s\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fEDgKhSVNYm-" - }, - "outputs": [], - "source": [ - "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", - "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"https://localhost:6900\",\n", - " api_key=\"admin.apikey\"\n", - ")" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\r" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "\n", + "def classify_image(sample):\n", + " label = classifier(sample[\"image_url\"], candidate_labels=labels)[0][\"label\"]\n", + " sample[\"clip_zero_shot\"] = labels.index(label)\n", + " return sample\n", + "\n", + "\n", + "test_dataset = test_dataset.map(classify_image)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8235294117647058\n" + ] + } + ], + "source": [ + "zero_shot_image_accuracy = accuracy_score(\n", + " test_dataset[\"label\"], test_dataset[\"clip_zero_shot\"]\n", + ")\n", + "print(f\"Accuracy: {zero_shot_image_accuracy}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "sgnz7XtXNYnG" + }, + "source": [ + "😞 Zero-shot image classification with a CLIP model gives an accuracy of **0.82** in just under **2 minutes** for only **20%** of the test data. This score is not impressive. Let's see if the text is more reliable." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "OEV0FJUmNYnG" + }, + "source": [ + "### 📚 Text\n", + "The product description and name also contain valuable information. Let's see what zero-shot classification of those can achieve. " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "edJSqNnaNYnG" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading (…)lve/main/config.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.15k/1.15k [00:00<00:00, 711kB/s]\n", + "Downloading pytorch_model.bin: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.63G/1.63G [00:06<00:00, 243MB/s]\n", + "Downloading (…)okenizer_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26.0/26.0 [00:00<00:00, 16.5kB/s]\n", + "Downloading (…)olve/main/vocab.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 899k/899k [00:02<00:00, 401kB/s]\n", + "Downloading (…)olve/main/merges.txt: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 456k/456k [00:00<00:00, 1.40MB/s]\n", + "Downloading (…)/main/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.36M/1.36M [00:00<00:00, 2.81MB/s]\n" + ] + } + ], + "source": [ + "classifier = pipeline(model=\"facebook/bart-large-mnli\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102, + "referenced_widgets": [ + "0ad0c441b68a48c0a8fb9436050d2a1d", + "1183c04b5df54255bad175f72d4a9153", + "4378864bd5004edcba65b59fecbe84d3", + "6f63b713ac2b4358b939c2818d15e67f", + "89ef6f10348a408a8546452e1fa74528", + "7451e0901e6f40f5971359c9ac5d47bb", + "fe63dbf326194c6c813327bd643a85cd", + "5d9977f50b704bd5b4a371387cc71a17", + "c7d383a298da44e491c81887632a89ae", + "4c46328ead39408995ba5a9837d57e94", + "35a4c4fa675546ccb34d38b6e1c7139c" + ] }, + "id": "aMK540c0NYnH", + "outputId": "5b1b7988-fa18-4945-9244-de8ab900325d" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Set the HF_TOKEN environment variable\n", - "# import os\n", - "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", - "\n", - "# # Replace api_url with the url to your HF Spaces URL\n", - "# # Replace api_key if you configured a custom API key\n", - "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", - "# api_key=\"admin.apikey\",\n", - "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", - "# )" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8235294117647058\n", + "CPU times: user 5min 41s, sys: 1.29 s, total: 5min 42s\n", + "Wall time: 1min 33s\n" + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "VgNib6EDNYm-" - }, - "source": [ - "Finally, let's include the imports we need:" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\r" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "\n", + "def classify_text(sample):\n", + " label = classifier(sample[\"page_description\"], candidate_labels=labels)[\"labels\"][0]\n", + " sample[\"bart_zero_shot\"] = labels.index(label)\n", + " return sample\n", + "\n", + "\n", + "test_dataset = test_dataset.map(classify_text)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8235294117647058\n" + ] + } + ], + "source": [ + "zero_shot_text_accuracy = accuracy_score(\n", + " test_dataset[\"label\"], test_dataset[\"clip_zero_shot\"]\n", + ")\n", + "print(f\"Accuracy: {zero_shot_text_accuracy}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "IhAmyXNGNYnH" + }, + "source": [ + "😞 Text classification takes less time, but accuracy is also less at __.79__. This shows that some information is held in images that are not in the text. It would be great if we could consolidate this information. 🤞\n", + "\n", + "Furthermore, both of these approaches use large language models that consume a significant amount of computation." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "s_ecJF0ZNYnI" + }, + "source": [ + "## Consolidate data labeling\n", + "\n", + "The scores above from two zero-shot classification approaches reveal that the task is possible but challenging using a zero-shot approach. \n", + "\n", + "With (our modified) Argilla, we can re-label the dataset and combine the information from image and text. Then, we can perform few-shot learning on the dataset.\n", + "\n", + "Spoiler: this should give us a better score than the zero-shot approaches, by combining information in image and text. Furthermore, our resulting language model should have lower latency than the zero-shot models." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bulk Labeling with embeddings\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 📷 Images\n", + "\n", + "Now we can use a clip model to get image embeddings for the images in the dataset. We can then repeat the process of adding vectors to our dataset, but now with an `image_vectors` key." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load CLIP model for image embedding\n", + "image_encoder = SentenceTransformer(\"clip-ViT-B-32\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def encode_image(image_url):\n", + " # utility function to encode image\n", + " image = Image.open(get(image_url, stream=True).raw)\n", + " vector = image_encoder.encode(image).tolist()\n", + " return vector\n", + "\n", + "\n", + "# Encode text field using batched computation\n", + "dataset = dataset.map(\n", + " lambda sample: {\"image_vectors\": encode_image(sample[\"image_url\"])}\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "2pkY0NZ5NYnI" + }, + "source": [ + "### 📚 Text\n", + "\n", + "With Argilla, we can annotate samples using semantic search and the 'find similar' button. There's a complete tutorial on this [here](labelling-textclassification-sentence-transformers-semantic.ipynb). It requires the recently added Similarity search features.\n", + "\n", + "![Argilla with images tutorial](https://docs.v1.argilla.io/en/latest/_static/reference/webapp/features-similaritysearch.png)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "w0JW4z8nNYnI" + }, + "outputs": [], + "source": [ + "# Define sentence transformers model for text embedding\n", + "dataset = load_dataset(ELECTRONICS_DATASET, streaming=True, split=\"unlabelled\")\n", + "encoder = SentenceTransformer(\"all-MiniLM-L6-v2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "BmZPHluTNYnJ" + }, + "outputs": [], + "source": [ + "# Encode text field using batched computation\n", + "dataset = dataset.map(\n", + " lambda batch: {\"text_vectors\": encoder.encode(batch[\"page_name\"]).tolist()},\n", + " batch_size=32,\n", + " batched=True,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload to Argilla\n", + "\n", + "We can upload multiple vectors to Argilla. We just need to use separate keys. We will use `image_vectors` and `text_vectors`." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# Turn vectors into a dictionary\n", + "dataset = dataset.map(\n", + " lambda r: {\"vectors\": {\"image\": r[\"image_vectors\"], \"text\": r[\"text_vectors\"]}},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 84, + "referenced_widgets": [ + "a07c719a0ffc41f9a0e8227562dc69d1", + "dc03df51df3845edac3492bc3ee3391e", + "ebf96889240d44f0b5a16fd945090213", + "c011f7d4aa7b4f66997d1c88804afacf", + "dcadb8f0088d43f6bc1a2cdc72cff730", + "b73e9fcc9e634c2780cc880cc65cc678", + "57ed9e91eb2f45a18ee08f6989bec0a4", + "4ae8401caf984baf9fc132490f11646a", + "e145a20bac85423a8ac6a1841f7ff4d7", + "0b4387ee1a0d4f769b391dcbc4d5477c", + "b8da2402e8e245d2b5ff592f602f138e" + ] + }, + "id": "gaEQm1BdNYnK", + "outputId": "0dfb1340-a685-4071-a4a1-71e6aa42463a" + }, + "outputs": [], + "source": [ + "# we need to set the metadata field length to 200 for longer urls\n", + "os.environ[\"ARGILLA_METADATA_FIELD_LENGTH\"] = \"200\"\n", + "\n", + "# instantiate Argilla records with vectors\n", + "records = [\n", + " rg.TextClassificationRecord(\n", + " text=sample[\"page_name\"],\n", + " metadata=dict(_image_url=sample[\"image_url\"]),\n", + " vectors=sample[\"vectors\"],\n", + " )\n", + " for sample in dataset\n", + "]\n", + "dataset_rg = rg.DatasetForTextClassification(records)\n", + "\n", + "# upload recors with vectors to Argilla\n", + "rg.log(\n", + " records=dataset_rg,\n", + " name=\"electronics_with_vectors\",\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "I5G3haEnNYnL" + }, + "source": [ + "![Screenshot_20230224_214133.png]()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-x1c0NC4NYnO" + }, + "source": [ + "## Fewshot Classification\n", + "\n", + "We can now use our newly labelled dataset to train a classifier. We will use a SetFit model due to the limited sample count. Notice the significantly reduced inference time and increased accuracy. \n", + "\n", + "A complete tutorial on few-shot classification with SetFit and Argilla can be found [here](labelling-textclassification-setfit-zeroshot.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M3rtb_KHNYnO", + "outputId": "4858c080-44e4-4d84-b22b-b01b3ce28b0f" + }, + "outputs": [], + "source": [ + "# load the 'newly' labelled dataset\n", + "dataset_rg = rg.load(\"electronics_with_vectors\")\n", + "labelled_dataset = dataset_rg.prepare_for_training(framework=\"transformers\")\n", + "# # To try the prelabelled slice from HF Hub\n", + "# labelled_dataset = load_dataset(ELECTRONICS_DATASET, split=\"labelled\")\n", + "# # To evaluate on the larger test set\n", + "# test_dataset = datasets.load_dataset(ELECTRONICS_DATASET, split=\"test\")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "xwTr038lNYnO", + "outputId": "70f363ab-78ec-4e3e-c009-fd266968ca30" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import pprint as pp\n", - "from requests import get\n", - "\n", - "from datasets import load_dataset\n", - "from PIL import Image\n", - "from sklearn.metrics import accuracy_score\n", - "from sentence_transformers import SentenceTransformer\n", - "from transformers import pipeline\n", - "from sentence_transformers.losses import CosineSimilarityLoss\n", - "from setfit import SetFitModel, SetFitTrainer\n", - "from PIL import Image" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.\n" + ] + } + ], + "source": [ + "# Load SetFit model from Hub\n", + "model = SetFitModel.from_pretrained(\"sentence-transformers/paraphrase-mpnet-base-v2\")\n", + "\n", + "# Create trainer\n", + "trainer = SetFitTrainer(\n", + " model=model,\n", + " train_dataset=labelled_dataset,\n", + " eval_dataset=test_dataset,\n", + " loss_class=CosineSimilarityLoss,\n", + " batch_size=16,\n", + " num_iterations=10,\n", + " column_mapping={\"page_name\": \"text\", \"label\": \"label\"},\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Yr7j8UldNYnP" + }, + "source": [ + "Now let's train ✈" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 240, + "referenced_widgets": [ + "76252be98eed408a8f08fab0edca8d86", + "5c634542ed3940a1a8ac3f73b634e88d", + "b0721a5abbc044a59a9d119c65523ec3", + "822cf5f25bbb4845a8df5b09df0c88f5", + "d24e98b7ba64420588aeee6c0a7d787a", + "de16a90176c24ff8aa76eb0af1281d22", + "c9464f1ec9e64361b0e621c664514ada", + "74cce0696fd74f999b8476974ec74f30", + "cad144a0ae5948f5896d12f83c044dfd", + "e831d4f1ad4d4f5f97bc9569a5294ca9", + "71f5c4292d4f4564bb0bf5af9368163f", + "3eb4c6ae76fd4118be2e466e560f6746", + "993a0a13bc7a4deea5cded6df4e27fd1", + "6d0477f7d5744f55a7882de18b923101", + "9b506c9457f046d4a91395935a934790", + "429a122883f6429d90c994558fc50cfc", + "9411b438e2634a3ebd0d1617d2ec3a7b", + "ffbfe6debabc4f5c8cad8383d596946e", + "e94edd188d8942e6b7f01cb1b962ba46", + "7059d83f776f49c887879e870bb71ee8", + "a696789fe05c40b281a0090625e66004", + "02b23c74c75e425cafe0c231b2892c47" + ] }, + "id": "qqp69mUVNYnP", + "outputId": "dc1d2b88-a61d-4f78-fd98-c8c5510b7972" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Enable Telemetry\n", - "\n", - "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "Applying column mapping to training dataset\n", + "***** Running training *****\n", + " Num examples = 5040\n", + " Num epochs = 1\n", + " Total optimization steps = 315\n", + " Total train batch size = 16\n", + "Iteration: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 315/315 [00:53<00:00, 5.94it/s]\n", + "Epoch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:53<00:00, 53.04s/it]\n", + "Applying column mapping to evaluation dataset\n", + "***** Running evaluation *****\n", + "Downloading builder script: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.20k/4.20k [00:00<00:00, 4.10MB/s]" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " from argilla.utils.telemetry import tutorial_running\n", - " tutorial_running()\n", - "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "{'accuracy': 0.9117647058823529}\n" + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "izZIHEzYNYnB" - }, - "source": [ - "## A 'real-world' multimodal dataset\n", - "\n", - "The dataset samples contain a `page_name`, `page_descriptions`, and a `label`. The dataset is split into two parts: `labelled` and `unlabelled`. The labelled portion is the result of my annotation so we can test methods. In reality, let's say this doesn't exist 😏. " + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "trainer.train()\n", + "metrics = trainer.evaluate()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9117647058823529\n" + ] + } + ], + "source": [ + "fewshot_relabelled_text_accuracy = metrics[\"accuracy\"]\n", + "pp.pprint(fewshot_relabelled_text_accuracy)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TLEiVC_KNYnP" + }, + "source": [ + "## Summary\n", + "\n", + "In this tutorial, we have learned to bulk-label a multi-modal dataset using a modified version of Argilla. We compared a few-shot classifier trained on the bulk-labelled dataset with zero-shot classifiers of image and text. The results show that the few-shot classifier is able to achieve a higher accuracy than the zero-shot classifiers. Furthermore, the SetFit model is significantly faster than the zero-shot classifiers.\n", + "\n", + "This approach can be applied to classification tasks with limited data and can be used to train a classifier with minimal human effort.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 438, - "referenced_widgets": [ - "d2cf4d1133a2421aa1ec980bdca4fecd", - "bcfc02b30e024b5c81856bcd9e191d07", - "eaefc3e261a7453cb66905079b71f16a", - "ac546920996e4ca081e438999ae19fc8", - "c54e39be37434e0d826d86041ef1de96", - "f4d6914361f44912b0d7ca1b16496606", - "b3c787a117ae42738c315c5cdabd508c", - "edf41d1718984e7dbef2b65799935fd8", - "20dad493a55343efaabbc6f7c1966cf0", - "b91ae0e5a43143e68f53099e85f1afc2", - "a1ec5941f87f4b23bc72188313ac9275", - "6482c21eb5d449f795a9e39fcae6a46e", - "2db4287290eb4c25bc440fc3f2f1d258", - "b470309f2a6b4878852ae9e589764edf", - "63de802f18e44b16a3a2f5029ad7b705", - "670f91d2101c4d87997c228210646f89", - "d8f11947b0d048489bdd649479c50b1f", - "70eee61c4d3842f69569dee3ac8773fa", - "26175752e18140b4a944bfc55f77a069", - "c13ea0c47f6e48f0a804fa9f2f5ce544", - "9e17b92da1aa43d0aa980db275437fde", - "d2186c7599164e528aa3d92f6999a9bb", - "35f1224563044be39e254342c6c5b635", - "87a472fb7193452faea139ab6bf1c9be", - "eb9b58dab8044074b1cac3e39f26e561", - "6359430089654ca4bdd587c125b317a0", - "be06bfcbe41040369d025626b8227b6b", - "082dfef575c94327a5377c3f7278cf5a", - "84d1a26c298b42cd820bcd2e4302a443", - "0987a64b9cea4850998e78d76d31c3b8", - "fa0573f2ebd84a5ba5ebdc0452362887", - "e8e295d7292245fb9f17413599d59960", - "0286aff9c56b43e7abd15f17dd33d6db", - "8b161e12449f4078bb64245518bb596d", - "71181e8f913b412fb72917c55a151723", - "dcd3ba169df34a0395eb5d03405f8835", - "d0b05902a4db4abd8c10cab6178a59ec", - "8294163893bc40c2bef26032babb3938", - "735a35f35dee4d548cd6284f9e3e1f5a", - "b7dfa3f0d2374482af6dc25ec8dd65df", - "c65f5b1c90e24d47a060c0b362749b82", - "01bc8315e7cd4064a8653eae44cea192", - "36156c14c8d544deab0ea65ac5ba7508", - "15d85f4fe15545bc85d3717d7af7bbb1", - "f8d66c452a6d4347b9a8247ad135fd35", - "3c8aa1d02f1541bdb59b0a8dff27148f", - "88677a526eae40a1b5e0b5e9e7a25218", - "eaf5ad9e9e3240799128ef8bbf0a195a", - "48f63d3af7604e8abe4ca94dbae63f48", - "27d3f1981c244549901253b3266de1ff", - "9aae9636296a4245989388403103e0eb", - "139f9e8c84f447b985c2080c17d14a8c", - "1d37c4be099b44b5bc67c2cd976693b9", - "1ad0c7637293497a9a75bfa45c1a06e8", - "4fc202bfa55842b48e59d4a800026331", - "bbb9442a3ec1463790bcf6533f3be905", - "16bd920f956f43b3921b79cec4d76d87", - "c12bc29016124339a6c3cb11f21bfaf0", - "a6b61668ad1146e78c2a9b2556b23703", - "14cc3d3e93c54226bc2f38a4745c1d23", - "16fccf90f2974a938167677b189b67a2", - "714590bcc27e4efc837f870812929f3e", - "9cef8cafa31a47fd9f7410478b5415e8", - "2e4deb55d7b74e2fa09c9f925fd2e751", - "2433cd897d734c78b3d4a62602865415", - "9fac34a2d3e44c95a2591b26ddae5c1e", - "88edaefb70c34854ad6f16d5a82b40e7", - "ba93d6edfed5434397d869ba2bd7aca5", - "7282d9ef9a754bcfb1297081296aed5a", - "946c67b0a3f54ce09c35db7bef44991c", - "f41cf420defb4b109f836fecea8e1d15", - "c10b310f7f6344b39c8d165c887d28f1", - "052b62b792fc4f1183ffb91f18d655c7", - "c4a54f5262014b77964d42429d5e43bf", - "cf540757bc79428bab170e157eb9381f", - "03390d37eba24242ae6c2099d5fe7863", - "88901ee5ca86458288bf82a22db7e379", - "498becb6513a44a4a17cff76a8ae4666", - "5066a8fd18d94f9da0aa0cf354d13452", - "a8d41a62f1b94575a744c12a399de8c8", - "e8cc8dbe0d164a9eaec472ffe1351db9", - "0d3cbcf01ca14063be3ae129db78fdcb", - "1a951db095d441e295bb9ff00d4dba30", - "e120b072886b4d4f94ef506ca3d6a605", - "d5fea5e89ebd4d298c169526eadeeb32", - "8513e53c07c04d75999be1fec6d5b491", - "e7da6c65cec140c3801fca68bcfdebcc", - "eccef3d9147242d9a1e42f4abd0ebe4c", - "b77605d5afb94b8cb579f3c690ea4203", - "aedab95f1bb144c38180f2b2e71c14ff", - "49d11634ca864c04bc87e472d3563e73", - "20163aeba069412eabd3b506a339667f", - "feef17ea90ae4105b2ffb56841ed2adb", - "e97c5c6e969842468f53520dfba1ee8f", - "89b04536243a48049eb777f00fe49555", - "94502dbbc1ab42f48d2d375a8265dbb4", - "090c3e60ff0d4e5fa930767672802d86", - "b577441dde9e43cb9644244ca0cf336e", - "b850f11900ae4874963b35e0b86470e5", - "963fbb3d5b9f4848a8cadf2a2d2b264b", - "52ad436eea8649058457d6380f618ea2", - "5817239aa98046e9be3736688f8c1a5f", - "c48e02dc87d74d50ba6de4aefeb1441e", - "ecc225aab6804d97b1cdd383621c9695", - "fcfda905fd4448b3940b04f4136c53a4", - "16162c715d4745f3aa91c7f38a78a32d", - "4c7287d821f1471e9ebdafa2f8229d28", - "5a65206e09c04176bfd33bcb31cd1ad5", - "9ac85729093c41c1b2a0fef5191b8ab5", - "d774f350171b4c3fb6be768dbf11c726" - ] - }, - "id": "gdVdxJt-NYnC", - "outputId": "c54aa613-806e-42fa-b4de-f374c78b1d50" - }, - "outputs": [], - "source": [ - "ELECTRONICS_DATASET = \"burtenshaw/electronics\"\n", - "dataset = load_dataset(ELECTRONICS_DATASET)\n", - "labels = dataset[\"labelled\"].features[\"label\"].names\n", - "int2str = dataset[\"labelled\"].features[\"label\"].int2str" + "data": { + "image/png": "", + "text/plain": [ + "
" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pandas import Series\n", + "\n", + "Series(\n", + " dict(\n", + " zero_shot_image_accuracy=zero_shot_image_accuracy,\n", + " zero_shot_text_accuracy=zero_shot_text_accuracy,\n", + " fewshot_relabelled_text_accuracy=fewshot_relabelled_text_accuracy,\n", + " )\n", + ").plot.bar()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "machine_shape": "hm", + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "01939245f8874aaca606e7a1e8b954a8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57b9cf8fe2fa4d918e8dc15e2efb6153", + "placeholder": "​", + "style": "IPY_MODEL_4f166794a0ad4061878ccfc623ec83c0", + "value": " 252/252 [00:02<00:00, 101.31it/s]" + } }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'image_url': 'https://tse1.mm.bing.net/th?id=OIP.to3Cddhws6ECl-_ySZ5ShQHaFi&pid=Api',\n", - " 'label': 1,\n", - " 'page_description': '\\n'\n", - " '\\n'\n", - " 'Are you looking for a way to reduce the number of '\n", - " 'purchase orders you need to place for cable assemblies? '\n", - " \"If so, then this guide is for you! We'll show you how to \"\n", - " 'source cable assemblies with fewer purchase orders, '\n", - " \"saving you time and money. We'll cover topics such as \"\n", - " 'understanding the different types of cable assemblies, '\n", - " 'researching suppliers, and negotiating the best prices. '\n", - " \"We'll also provide tips on how to streamline the \"\n", - " 'ordering process and ensure you get the best quality '\n", - " \"products. With this guide, you'll be able to source \"\n", - " 'cable assemblies with fewer purchase orders and get the '\n", - " 'most out of your budget.',\n", - " 'page_name': 'How to Source Cable Assemblies With Fewer Purchase Orders ...'}\n" - ] - } + "01bc8315e7cd4064a8653eae44cea192": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0286aff9c56b43e7abd15f17dd33d6db": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "02b23c74c75e425cafe0c231b2892c47": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "02f439d46b384718b0c7ab5fd8aa2d03": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "03390d37eba24242ae6c2099d5fe7863": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "046bafaebfd14a6282e231c6a7c22b3e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "052b62b792fc4f1183ffb91f18d655c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "05c50612c7ec41c7877d85c9d0502c0b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "082dfef575c94327a5377c3f7278cf5a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "088db72b2c4a41adb5b6f543c4fd4070": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_47a715f99b224f41b21d9ddc7467df52", + "placeholder": "​", + "style": "IPY_MODEL_b179999ec76e48579fd195b7c788f366", + "value": "Downloading (…)okenizer_config.json: 100%" + } + }, + "090c3e60ff0d4e5fa930767672802d86": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "094d2d5dc22d4f93918468209abbca2e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "097dc95081d04f7e9f803384b5dee1ea": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5748285ca2ba4b93b30621437e931345", + "placeholder": "​", + "style": "IPY_MODEL_3017a4909cfe49648a9478ea14b9d062", + "value": "Downloading (…)ce_transformers.json: 100%" + } + }, + "0987a64b9cea4850998e78d76d31c3b8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0ab25a53cd28408c802a5dcda0e1206e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0ad0c441b68a48c0a8fb9436050d2a1d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1183c04b5df54255bad175f72d4a9153", + "IPY_MODEL_4378864bd5004edcba65b59fecbe84d3", + "IPY_MODEL_6f63b713ac2b4358b939c2818d15e67f" ], - "source": [ - "# show a sample\n", - "pp.pprint(next(iter(dataset[\"labelled\"])))" - ] + "layout": "IPY_MODEL_89ef6f10348a408a8546452e1fa74528" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 🔫 Zero-Shot Classification" - ] + "0b4387ee1a0d4f769b391dcbc4d5477c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "U9R2eaRXNYnC" - }, - "source": [ - "### 📷 Images\n", - "\n", - "First, we will explore some zero-shot techniques. For the sake of comparison, we will use the `labelled` portion of the dataset.\n" - ] + "0c28f7e5514c432197ed1b0e24fcc39b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "SshZkW1eNYnD", - "outputId": "a59a4610-0e25-434a-a6fb-d33394293e57" - }, - "outputs": [], - "source": [ - "# to save time, we'll take a slice of the dataset\n", - "test_dataset = load_dataset(ELECTRONICS_DATASET, split=\"test[:20%]\")" - ] + "0d3cbcf01ca14063be3ae129db78fdcb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "r-2LBdNVNYnD", - "outputId": "6c3cb81f-95de-4530-d501-f8b43f031c7d" - }, - "outputs": [], - "source": [ - "# More models in the model hub.\n", - "model_name = \"openai/clip-vit-large-patch14\"\n", - "classifier = pipeline(\"zero-shot-image-classification\", model = model_name)" - ] + "0d4090c9cfd542cba1a14d30b154f733": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "FodfnkJgNYnE" - }, - "source": [ - "First, we can zero-shot classify one image from the dataset: " - ] + "0f590a3e9ed148c9b076fa3b16e287f8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 509 - }, - "id": "5FMETGuDNYnE", - "outputId": "508eb028-acdb-428c-a7b1-8f34e2cbdb79" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'label': 'switches', 'score': 0.9631496667861938}\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } + "0f993203b2004905992240787135ccb7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "11064b742bf34b45bbeb67040759fe94": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1183c04b5df54255bad175f72d4a9153": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7451e0901e6f40f5971359c9ac5d47bb", + "placeholder": "​", + "style": "IPY_MODEL_fe63dbf326194c6c813327bd643a85cd", + "value": "100%" + } + }, + "11dd9c8f6fa744cfbf25ce25f3c8291b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dd355cdd1594499bbccc8a7842a0ba93", + "placeholder": "​", + "style": "IPY_MODEL_435b1f4a70694161a749af8ea40baabb", + "value": "Downloading (…)cad52eb/modules.json: 100%" + } + }, + "12da828a56fd47fbac3881a78ae3b40c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_20ba960a7c5d4a4fa43ee8c90c412b06", + "max": 690, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_27d6c644f03a4874a0a0589cce8a9624", + "value": 690 + } + }, + "12e8ab3cc4fe40e7bbb056b3354603bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "13942a35ebd3432d9e3a107c54af9619": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bebd64b52d24437dabeb5119235853d7", + "placeholder": "​", + "style": "IPY_MODEL_094d2d5dc22d4f93918468209abbca2e", + "value": "Downloading (…)rocessor_config.json: 100%" + } + }, + "139f9e8c84f447b985c2080c17d14a8c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "14b5a9186b3a47108fafa070c06ab35c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f7d2ce646a4c42ed893383a4f75af2d6", + "placeholder": "​", + "style": "IPY_MODEL_2a967808803e44fea67bd9958542eb43", + "value": "100%" + } + }, + "14cc3d3e93c54226bc2f38a4745c1d23": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "15d85f4fe15545bc85d3717d7af7bbb1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "16162c715d4745f3aa91c7f38a78a32d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "16798f3b9a934353ae8e11cdd22820ca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0f993203b2004905992240787135ccb7", + "placeholder": "​", + "style": "IPY_MODEL_e8756ee3f2f64e01b2790714b6704584", + "value": " 604/604 [00:00<00:00, 39.1kB/s]" + } + }, + "16bd920f956f43b3921b79cec4d76d87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_16fccf90f2974a938167677b189b67a2", + "placeholder": "​", + "style": "IPY_MODEL_714590bcc27e4efc837f870812929f3e", + "value": "Extracting data files: 100%" + } + }, + "16fccf90f2974a938167677b189b67a2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1a951db095d441e295bb9ff00d4dba30": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1ad0c7637293497a9a75bfa45c1a06e8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1d37c4be099b44b5bc67c2cd976693b9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1e2da1ce24984300af0b7246ebd59b14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "20163aeba069412eabd3b506a339667f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b577441dde9e43cb9644244ca0cf336e", + "placeholder": "​", + "style": "IPY_MODEL_b850f11900ae4874963b35e0b86470e5", + "value": " 0/? [00:00<?, ? tables/s]" + } + }, + "20ba960a7c5d4a4fa43ee8c90c412b06": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "20dad493a55343efaabbc6f7c1966cf0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2108640a73e44fe28bd0445abdb2048d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_11064b742bf34b45bbeb67040759fe94", + "max": 316, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_83569f1993e44a439c4c99d5d58251f2", + "value": 316 + } + }, + "22a1ecc265c7419b99b89dc635351163": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2433cd897d734c78b3d4a62602865415": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2587fb3caed54a10829e8ba594510e7a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c59a11b4cdcd45a181e5417839a21245", + "placeholder": "​", + "style": "IPY_MODEL_b9850f02e9ec4069aa836d9ed11aa019", + "value": " 316/316 [00:00<00:00, 19.1kB/s]" + } + }, + "26175752e18140b4a944bfc55f77a069": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2685384c30bb4865b7f14f62fbe516a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "27d3f1981c244549901253b3266de1ff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "27d6c644f03a4874a0a0589cce8a9624": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2a967808803e44fea67bd9958542eb43": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2db4287290eb4c25bc440fc3f2f1d258": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d8f11947b0d048489bdd649479c50b1f", + "placeholder": "​", + "style": "IPY_MODEL_70eee61c4d3842f69569dee3ac8773fa", + "value": "Downloading data files: 100%" + } + }, + "2e4deb55d7b74e2fa09c9f925fd2e751": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2f093aa6d36c4622bd09d1253bf6c3d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2f87bde2c64b42bc90ed461265cfe1f1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2f883c076ee7470481eb7d0fcb25f685": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3017a4909cfe49648a9478ea14b9d062": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "34453701eb2f40df9a2a0154c02990d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fadb08eafb084a4e91f8406055beb502", + "placeholder": "​", + "style": "IPY_MODEL_8aae0197e477451a890b1f93cdeebe10", + "value": " 34/34 [02:30<00:00, 4.40s/ex]" + } + }, + "35a4c4fa675546ccb34d38b6e1c7139c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "35c4bd29723b46ad94b098685953aefb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8c72c24c20b84bd4a007929384e91b78", + "placeholder": "​", + "style": "IPY_MODEL_515f4f3435ef468499acd8e48daf709b", + "value": " 4.03k/4.03k [00:00<00:00, 250kB/s]" + } + }, + "35f1224563044be39e254342c6c5b635": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_87a472fb7193452faea139ab6bf1c9be", + "IPY_MODEL_eb9b58dab8044074b1cac3e39f26e561", + "IPY_MODEL_6359430089654ca4bdd587c125b317a0" ], - "source": [ - "image_to_classify = next(iter(dataset[\"test\"]))[\"image_url\"]\n", - "scores = classifier(image_to_classify, candidate_labels = labels)\n", - "# show a sample\n", - "pp.pprint(scores[0])\n", - "Image.open(get(image_to_classify, stream =True).raw)" - ] + "layout": "IPY_MODEL_be06bfcbe41040369d025626b8227b6b" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "vwkVzNhVNYnF" - }, - "source": [ - "Now we will test the accuracy and latency of a zero-shot image classifier on a sub-section of the `labelled` portion of the dataset." - ] + "36156c14c8d544deab0ea65ac5ba7508": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 102, - "referenced_widgets": [ - "a8ec0defdfe841caaf0caa4de07fb84a", - "14b5a9186b3a47108fafa070c06ab35c", - "3916f72e314d46e883db7e529210b098", - "34453701eb2f40df9a2a0154c02990d4", - "9e98837e01004eba948e9c2d3ea23e5f", - "f7d2ce646a4c42ed893383a4f75af2d6", - "2a967808803e44fea67bd9958542eb43", - "540bda296c904aa48c10db351a11ca66", - "c1f28cff3e064e46a8c9bdd0d705a099", - "fadb08eafb084a4e91f8406055beb502", - "8aae0197e477451a890b1f93cdeebe10" - ] - }, - "id": "j_cJkBbbNYnF", - "outputId": "a228a4b3-4ba5-4014-8a6d-7166a688553a" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 9min 20s, sys: 1.19 s, total: 9min 21s\n", - "Wall time: 2min 28s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r" - ] - } + "3637b3da70d34d529d48d630baeaf13b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_56d32c90b86c48b08d020ed3dcfd3a4e", + "placeholder": "​", + "style": "IPY_MODEL_2f883c076ee7470481eb7d0fcb25f685", + "value": "Downloading (…)cial_tokens_map.json: 100%" + } + }, + "375cbfe47f9f4eadaf1294be69b16dfd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_088db72b2c4a41adb5b6f543c4fd4070", + "IPY_MODEL_51f3dbca5cb04c818bb65cf4adc7914d", + "IPY_MODEL_16798f3b9a934353ae8e11cdd22820ca" ], - "source": [ - "%%time\n", - "def classify_image(sample):\n", - " label = classifier(sample[\"image_url\"], candidate_labels = labels)[0][\"label\"]\n", - " sample[\"clip_zero_shot\"] = labels.index(label)\n", - " return sample\n", - "\n", - "test_dataset = test_dataset.map(classify_image)" - ] + "layout": "IPY_MODEL_97ff6a0b5366407c8138eedd05532d77" + } }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy: 0.8235294117647058\n" - ] - } + "3916f72e314d46e883db7e529210b098": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_540bda296c904aa48c10db351a11ca66", + "max": 34, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c1f28cff3e064e46a8c9bdd0d705a099", + "value": 34 + } + }, + "39555af32eeb44bfa49b04aa493320ba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3c370e064d684a9f94ca552bbcc893d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b13577cd8ab54745b1f75619740f7e8d", + "max": 524619, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6b7bd8943d824ad586939a5279368a0d", + "value": 524619 + } + }, + "3c8aa1d02f1541bdb59b0a8dff27148f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_27d3f1981c244549901253b3266de1ff", + "placeholder": "​", + "style": "IPY_MODEL_9aae9636296a4245989388403103e0eb", + "value": "Downloading data: 100%" + } + }, + "3e50350e1d3d491ababa8cd3764be1d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_75686e22371d4d3babec811f49d280f9", + "max": 1875, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_70d08a9270294f8380cc7c740700ca8c", + "value": 1875 + } + }, + "3e71940409664a86a2be2c0b86b10d64": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0c28f7e5514c432197ed1b0e24fcc39b", + "max": 605266175, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c41c679d7f0341f3839b9abc5329ec08", + "value": 605266175 + } + }, + "3eb4c6ae76fd4118be2e466e560f6746": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_993a0a13bc7a4deea5cded6df4e27fd1", + "IPY_MODEL_6d0477f7d5744f55a7882de18b923101", + "IPY_MODEL_9b506c9457f046d4a91395935a934790" ], - "source": [ - "zero_shot_image_accuracy = accuracy_score(test_dataset[\"label\"], test_dataset[\"clip_zero_shot\"])\n", - "print(f\"Accuracy: {zero_shot_image_accuracy}\")" - ] + "layout": "IPY_MODEL_429a122883f6429d90c994558fc50cfc" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "sgnz7XtXNYnG" - }, - "source": [ - "😞 Zero-shot image classification with a CLIP model gives an accuracy of **0.82** in just under **2 minutes** for only **20%** of the test data. This score is not impressive. Let's see if the text is more reliable." - ] + "3eec352c0eea47e9b6e419785750df61": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "OEV0FJUmNYnG" - }, - "source": [ - "### 📚 Text\n", - "The product description and name also contain valuable information. Let's see what zero-shot classification of those can achieve. " - ] + "3f343b79e06e4d5d86cf0ef82729e769": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3637b3da70d34d529d48d630baeaf13b", + "IPY_MODEL_56aaec4fab6a47b18e147b2cd0c53c49", + "IPY_MODEL_a7378faa28e44fb19ecdafc7124acce2" + ], + "layout": "IPY_MODEL_b697802729d44712bfe68c0ef4f47cd8" + } }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "id": "edJSqNnaNYnG" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading (…)lve/main/config.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.15k/1.15k [00:00<00:00, 711kB/s]\n", - "Downloading pytorch_model.bin: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.63G/1.63G [00:06<00:00, 243MB/s]\n", - "Downloading (…)okenizer_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26.0/26.0 [00:00<00:00, 16.5kB/s]\n", - "Downloading (…)olve/main/vocab.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 899k/899k [00:02<00:00, 401kB/s]\n", - "Downloading (…)olve/main/merges.txt: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 456k/456k [00:00<00:00, 1.40MB/s]\n", - "Downloading (…)/main/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.36M/1.36M [00:00<00:00, 2.81MB/s]\n" - ] - } + "405da6f282174a00a302d69a17634ca8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "429a122883f6429d90c994558fc50cfc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "432654cc3aff41019a5df2f5dbcab6b3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "435b1f4a70694161a749af8ea40baabb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "435b2d9a31af4b479bca791932a9315f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4378864bd5004edcba65b59fecbe84d3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5d9977f50b704bd5b4a371387cc71a17", + "max": 34, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c7d383a298da44e491c81887632a89ae", + "value": 34 + } + }, + "4477e281c40c41f9a5c076e30640460e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd56fa76c93c48b2bc179cb0a7bffac6", + "max": 122, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e444beb58b7c4ad1aa94959ef0b5bd52", + "value": 122 + } + }, + "4666546df28e4dc7b6c7d599cac5e26c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "47a715f99b224f41b21d9ddc7467df52": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48a8c0d648694b8abddf07ed12b68a93": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4666546df28e4dc7b6c7d599cac5e26c", + "placeholder": "​", + "style": "IPY_MODEL_1e2da1ce24984300af0b7246ebd59b14", + "value": "Downloading (…)LIPModel/config.json: 100%" + } + }, + "48f63d3af7604e8abe4ca94dbae63f48": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "498becb6513a44a4a17cff76a8ae4666": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5066a8fd18d94f9da0aa0cf354d13452", + "IPY_MODEL_a8d41a62f1b94575a744c12a399de8c8", + "IPY_MODEL_e8cc8dbe0d164a9eaec472ffe1351db9" ], - "source": [ - "classifier = pipeline(model=\"facebook/bart-large-mnli\")" - ] + "layout": "IPY_MODEL_0d3cbcf01ca14063be3ae129db78fdcb" + } }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 102, - "referenced_widgets": [ - "0ad0c441b68a48c0a8fb9436050d2a1d", - "1183c04b5df54255bad175f72d4a9153", - "4378864bd5004edcba65b59fecbe84d3", - "6f63b713ac2b4358b939c2818d15e67f", - "89ef6f10348a408a8546452e1fa74528", - "7451e0901e6f40f5971359c9ac5d47bb", - "fe63dbf326194c6c813327bd643a85cd", - "5d9977f50b704bd5b4a371387cc71a17", - "c7d383a298da44e491c81887632a89ae", - "4c46328ead39408995ba5a9837d57e94", - "35a4c4fa675546ccb34d38b6e1c7139c" - ] - }, - "id": "aMK540c0NYnH", - "outputId": "5b1b7988-fa18-4945-9244-de8ab900325d" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy: 0.8235294117647058\n", - "CPU times: user 5min 41s, sys: 1.29 s, total: 5min 42s\n", - "Wall time: 1min 33s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r" - ] - } + "49d11634ca864c04bc87e472d3563e73": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "info", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_94502dbbc1ab42f48d2d375a8265dbb4", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_090c3e60ff0d4e5fa930767672802d86", + "value": 1 + } + }, + "49ddd099c0ed4535b2f3da0e1dc5f44b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0ab25a53cd28408c802a5dcda0e1206e", + "placeholder": "​", + "style": "IPY_MODEL_4ade3df272594fb68ae463234503c638", + "value": "Downloading (…)d52eb/.gitattributes: 100%" + } + }, + "4a37959a5add495981fde226b229fb19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4ade3df272594fb68ae463234503c638": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4ae8401caf984baf9fc132490f11646a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c46328ead39408995ba5a9837d57e94": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c7287d821f1471e9ebdafa2f8229d28": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f166794a0ad4061878ccfc623ec83c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4fc202bfa55842b48e59d4a800026331": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5066a8fd18d94f9da0aa0cf354d13452": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1a951db095d441e295bb9ff00d4dba30", + "placeholder": "​", + "style": "IPY_MODEL_e120b072886b4d4f94ef506ca3d6a605", + "value": "" + } + }, + "51307f0869b442d88a7a6b49bf7243b2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aea6c5629724499da38f8a06667f7a2e", + "IPY_MODEL_3e50350e1d3d491ababa8cd3764be1d1", + "IPY_MODEL_79e43db8a877400198f73dbab0105ade" ], - "source": [ - "%%time\n", - "def classify_text(sample):\n", - " label = classifier(sample[\"page_description\"], candidate_labels = labels)[\"labels\"][0]\n", - " sample[\"bart_zero_shot\"] = labels.index(label)\n", - " return sample\n", - "\n", - "test_dataset = test_dataset.map(classify_text)" - ] + "layout": "IPY_MODEL_046bafaebfd14a6282e231c6a7c22b3e" + } }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy: 0.8235294117647058\n" - ] - } + "515f4f3435ef468499acd8e48daf709b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "51f3dbca5cb04c818bb65cf4adc7914d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9bcb1bc9f0e2439b8754e9106eb0a802", + "max": 604, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2685384c30bb4865b7f14f62fbe516a0", + "value": 604 + } + }, + "52aa2bbbdd5246db8520ac2822fcbe54": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "52ad436eea8649058457d6380f618ea2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fcfda905fd4448b3940b04f4136c53a4", + "placeholder": "​", + "style": "IPY_MODEL_16162c715d4745f3aa91c7f38a78a32d", + "value": "100%" + } + }, + "534e82da7a5d47a58ae95ad7ebe1fc2e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_13942a35ebd3432d9e3a107c54af9619", + "IPY_MODEL_2108640a73e44fe28bd0445abdb2048d", + "IPY_MODEL_2587fb3caed54a10829e8ba594510e7a" ], - "source": [ - "zero_shot_text_accuracy = accuracy_score(test_dataset[\"label\"], test_dataset[\"clip_zero_shot\"])\n", - "print(f\"Accuracy: {zero_shot_text_accuracy}\")" - ] + "layout": "IPY_MODEL_661c517493da40f581fcceab85b5bee1" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "IhAmyXNGNYnH" - }, - "source": [ - "😞 Text classification takes less time, but accuracy is also less at __.79__. This shows that some information is held in images that are not in the text. It would be great if we could consolidate this information. 🤞\n", - "\n", - "Furthermore, both of these approaches use large language models that consume a significant amount of computation." - ] + "540bda296c904aa48c10db351a11ca66": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "s_ecJF0ZNYnI" - }, - "source": [ - "## Consolidate data labeling\n", - "\n", - "The scores above from two zero-shot classification approaches reveal that the task is possible but challenging using a zero-shot approach. \n", - "\n", - "With (our modified) Argilla, we can re-label the dataset and combine the information from image and text. Then, we can perform few-shot learning on the dataset.\n", - "\n", - "Spoiler: this should give us a better score than the zero-shot approaches, by combining information in image and text. Furthermore, our resulting language model should have lower latency than the zero-shot models." - ] + "56aaec4fab6a47b18e147b2cd0c53c49": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6f7d732ae96847809230720948fba435", + "max": 389, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c026c0d5a95d487f8315821c616d1279", + "value": 389 + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bulk Labeling with embeddings\n" - ] + "56d32c90b86c48b08d020ed3dcfd3a4e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 📷 Images\n", - "\n", - "Now we can use a clip model to get image embeddings for the images in the dataset. We can then repeat the process of adding vectors to our dataset, but now with an `image_vectors` key." - ] + "5748285ca2ba4b93b30621437e931345": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load CLIP model for image embedding\n", - "image_encoder = SentenceTransformer('clip-ViT-B-32')" - ] + "57b9cf8fe2fa4d918e8dc15e2efb6153": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def encode_image(image_url):\n", - " # utility function to encode image\n", - " image = Image.open(get(image_url, stream =True).raw)\n", - " vector = image_encoder.encode(image).tolist()\n", - " return vector\n", - "\n", - "# Encode text field using batched computation\n", - "dataset = dataset.map(lambda sample: {\"image_vectors\": encode_image(sample[\"image_url\"])})" - ] + "57ed9e91eb2f45a18ee08f6989bec0a4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "2pkY0NZ5NYnI" - }, - "source": [ - "### 📚 Text\n", - "\n", - "With Argilla, we can annotate samples using semantic search and the 'find similar' button. There's a complete tutorial on this [here](labelling-textclassification-sentence-transformers-semantic.ipynb). It requires the recently added Similarity search features.\n", - "\n", - "![Argilla with images tutorial](https://docs.v1.argilla.io/en/latest/_static/reference/webapp/features-similaritysearch.png)\n", - "\n" - ] + "5817239aa98046e9be3736688f8c1a5f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4c7287d821f1471e9ebdafa2f8229d28", + "max": 3, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5a65206e09c04176bfd33bcb31cd1ad5", + "value": 3 + } }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "id": "w0JW4z8nNYnI" - }, - "outputs": [], - "source": [ - "# Define sentence transformers model for text embedding\n", - "dataset = load_dataset(ELECTRONICS_DATASET, streaming=True, split=\"unlabelled\")\n", - "encoder = SentenceTransformer(\"all-MiniLM-L6-v2\")" - ] + "5a65206e09c04176bfd33bcb31cd1ad5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "id": "BmZPHluTNYnJ" - }, - "outputs": [], - "source": [ - "# Encode text field using batched computation\n", - "dataset = dataset.map(lambda batch: {\"text_vectors\": encoder.encode(batch[\"page_name\"]).tolist()}, batch_size=32, batched=True)" - ] + "5c634542ed3940a1a8ac3f73b634e88d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_de16a90176c24ff8aa76eb0af1281d22", + "placeholder": "​", + "style": "IPY_MODEL_c9464f1ec9e64361b0e621c664514ada", + "value": "Epoch: 100%" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Upload to Argilla\n", - "\n", - "We can upload multiple vectors to Argilla. We just need to use separate keys. We will use `image_vectors` and `text_vectors`." - ] + "5d9977f50b704bd5b4a371387cc71a17": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "# Turn vectors into a dictionary\n", - "dataset = dataset.map(\n", - " lambda r: {\"vectors\": {\"image\": r[\"image_vectors\"], \"text\": r[\"text_vectors\"]}},\n", - ")" - ] + "5e8c1c4d99654ff1b32732a7297871f7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 84, - "referenced_widgets": [ - "a07c719a0ffc41f9a0e8227562dc69d1", - "dc03df51df3845edac3492bc3ee3391e", - "ebf96889240d44f0b5a16fd945090213", - "c011f7d4aa7b4f66997d1c88804afacf", - "dcadb8f0088d43f6bc1a2cdc72cff730", - "b73e9fcc9e634c2780cc880cc65cc678", - "57ed9e91eb2f45a18ee08f6989bec0a4", - "4ae8401caf984baf9fc132490f11646a", - "e145a20bac85423a8ac6a1841f7ff4d7", - "0b4387ee1a0d4f769b391dcbc4d5477c", - "b8da2402e8e245d2b5ff592f602f138e" - ] - }, - "id": "gaEQm1BdNYnK", - "outputId": "0dfb1340-a685-4071-a4a1-71e6aa42463a" - }, - "outputs": [], - "source": [ - "# we need to set the metadata field length to 200 for longer urls\n", - "os.environ[\"ARGILLA_METADATA_FIELD_LENGTH\"] = \"200\"\n", - "\n", - "# instantiate Argilla records with vectors\n", - "records = [\n", - " rg.TextClassificationRecord(\n", - " text=sample[\"page_name\"],\n", - " metadata=dict(_image_url=sample[\"image_url\"]),\n", - " vectors=sample[\"vectors\"]\n", - " )\n", - " for sample in dataset\n", - "]\n", - "dataset_rg = rg.DatasetForTextClassification(records)\n", - "\n", - "# upload recors with vectors to Argilla\n", - "rg.log(\n", - " records=dataset_rg,\n", - " name=\"electronics_with_vectors\",\n", - ")" - ] + "61e53dcae1c4457aa5089ae1e8d276ae": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a5a67282d5b14d3f803347ddd2ccc44f", + "placeholder": "​", + "style": "IPY_MODEL_b5a4880486774113bef9fc78d7bb4ed9", + "value": "100%" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "I5G3haEnNYnL" - }, - "source": [ - "![Screenshot_20230224_214133.png]()" - ] + "6359430089654ca4bdd587c125b317a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e8e295d7292245fb9f17413599d59960", + "placeholder": "​", + "style": "IPY_MODEL_0286aff9c56b43e7abd15f17dd33d6db", + "value": " 85.3k/85.3k [00:00<00:00, 286kB/s]" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "-x1c0NC4NYnO" - }, - "source": [ - "## Fewshot Classification\n", - "\n", - "We can now use our newly labelled dataset to train a classifier. We will use a SetFit model due to the limited sample count. Notice the significantly reduced inference time and increased accuracy. \n", - "\n", - "A complete tutorial on few-shot classification with SetFit and Argilla can be found [here](labelling-textclassification-setfit-zeroshot.ipynb)." - ] + "63de802f18e44b16a3a2f5029ad7b705": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9e17b92da1aa43d0aa980db275437fde", + "placeholder": "​", + "style": "IPY_MODEL_d2186c7599164e528aa3d92f6999a9bb", + "value": " 3/3 [00:05<00:00, 1.70s/it]" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "M3rtb_KHNYnO", - "outputId": "4858c080-44e4-4d84-b22b-b01b3ce28b0f" - }, - "outputs": [], - "source": [ - "# load the 'newly' labelled dataset \n", - "dataset_rg = rg.load(\"electronics_with_vectors\")\n", - "labelled_dataset = dataset_rg.prepare_for_training(framework=\"transformers\")\n", - "# # To try the prelabelled slice from HF Hub\n", - "# labelled_dataset = load_dataset(ELECTRONICS_DATASET, split=\"labelled\")\n", - "# # To evaluate on the larger test set\n", - "# test_dataset = datasets.load_dataset(ELECTRONICS_DATASET, split=\"test\")" - ] + "6482c21eb5d449f795a9e39fcae6a46e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2db4287290eb4c25bc440fc3f2f1d258", + "IPY_MODEL_b470309f2a6b4878852ae9e589764edf", + "IPY_MODEL_63de802f18e44b16a3a2f5029ad7b705" + ], + "layout": "IPY_MODEL_670f91d2101c4d87997c228210646f89" + } }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "xwTr038lNYnO", - "outputId": "70f363ab-78ec-4e3e-c009-fd266968ca30" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.\n" - ] - } + "661c517493da40f581fcceab85b5bee1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "670f91d2101c4d87997c228210646f89": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "68f83424c07844f1b439cf9a2170818b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6b7bd8943d824ad586939a5279368a0d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6d0477f7d5744f55a7882de18b923101": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e94edd188d8942e6b7f01cb1b962ba46", + "max": 315, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7059d83f776f49c887879e870bb71ee8", + "value": 315 + } + }, + "6f63b713ac2b4358b939c2818d15e67f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4c46328ead39408995ba5a9837d57e94", + "placeholder": "​", + "style": "IPY_MODEL_35a4c4fa675546ccb34d38b6e1c7139c", + "value": " 34/34 [01:36<00:00, 2.65s/ex]" + } + }, + "6f7d732ae96847809230720948fba435": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7059d83f776f49c887879e870bb71ee8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "707c06347f5444e185ca2bc7c07eecfa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "70d08a9270294f8380cc7c740700ca8c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "70eee61c4d3842f69569dee3ac8773fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "71181e8f913b412fb72917c55a151723": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_735a35f35dee4d548cd6284f9e3e1f5a", + "placeholder": "​", + "style": "IPY_MODEL_b7dfa3f0d2374482af6dc25ec8dd65df", + "value": "Downloading data: 100%" + } + }, + "713939a800704d4a8e95217c4b3c6d0e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ac8f808451764c4aac75914710521941", + "IPY_MODEL_3c370e064d684a9f94ca552bbcc893d0", + "IPY_MODEL_a5fc404735384abba31ed330ad4d4985" ], - "source": [ - "# Load SetFit model from Hub\n", - "model = SetFitModel.from_pretrained(\"sentence-transformers/paraphrase-mpnet-base-v2\")\n", - "\n", - "# Create trainer\n", - "trainer = SetFitTrainer(\n", - " model=model,\n", - " train_dataset=labelled_dataset,\n", - " eval_dataset=test_dataset,\n", - " loss_class=CosineSimilarityLoss,\n", - " batch_size=16,\n", - " num_iterations=10,\n", - " column_mapping={\"page_name\":\"text\", \"label\":\"label\"}\n", - ")\n" - ] + "layout": "IPY_MODEL_f6e1dadaed924874a4c12e4759921e36" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "Yr7j8UldNYnP" - }, - "source": [ - "Now let's train ✈" - ] + "714590bcc27e4efc837f870812929f3e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 240, - "referenced_widgets": [ - "76252be98eed408a8f08fab0edca8d86", - "5c634542ed3940a1a8ac3f73b634e88d", - "b0721a5abbc044a59a9d119c65523ec3", - "822cf5f25bbb4845a8df5b09df0c88f5", - "d24e98b7ba64420588aeee6c0a7d787a", - "de16a90176c24ff8aa76eb0af1281d22", - "c9464f1ec9e64361b0e621c664514ada", - "74cce0696fd74f999b8476974ec74f30", - "cad144a0ae5948f5896d12f83c044dfd", - "e831d4f1ad4d4f5f97bc9569a5294ca9", - "71f5c4292d4f4564bb0bf5af9368163f", - "3eb4c6ae76fd4118be2e466e560f6746", - "993a0a13bc7a4deea5cded6df4e27fd1", - "6d0477f7d5744f55a7882de18b923101", - "9b506c9457f046d4a91395935a934790", - "429a122883f6429d90c994558fc50cfc", - "9411b438e2634a3ebd0d1617d2ec3a7b", - "ffbfe6debabc4f5c8cad8383d596946e", - "e94edd188d8942e6b7f01cb1b962ba46", - "7059d83f776f49c887879e870bb71ee8", - "a696789fe05c40b281a0090625e66004", - "02b23c74c75e425cafe0c231b2892c47" - ] - }, - "id": "qqp69mUVNYnP", - "outputId": "dc1d2b88-a61d-4f78-fd98-c8c5510b7972" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Applying column mapping to training dataset\n", - "***** Running training *****\n", - " Num examples = 5040\n", - " Num epochs = 1\n", - " Total optimization steps = 315\n", - " Total train batch size = 16\n", - "Iteration: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 315/315 [00:53<00:00, 5.94it/s]\n", - "Epoch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:53<00:00, 53.04s/it]\n", - "Applying column mapping to evaluation dataset\n", - "***** Running evaluation *****\n", - "Downloading builder script: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.20k/4.20k [00:00<00:00, 4.10MB/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'accuracy': 0.9117647058823529}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } + "71f5c4292d4f4564bb0bf5af9368163f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7282d9ef9a754bcfb1297081296aed5a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "info", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c4a54f5262014b77964d42429d5e43bf", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cf540757bc79428bab170e157eb9381f", + "value": 1 + } + }, + "735a35f35dee4d548cd6284f9e3e1f5a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "742743267970469b8b358f051aeb3d7f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7451e0901e6f40f5971359c9ac5d47bb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "74cce0696fd74f999b8476974ec74f30": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "74d53a9e164f492785463bd1cdb376a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_22a1ecc265c7419b99b89dc635351163", + "max": 4025, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_405da6f282174a00a302d69a17634ca8", + "value": 4025 + } + }, + "75686e22371d4d3babec811f49d280f9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76252be98eed408a8f08fab0edca8d86": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5c634542ed3940a1a8ac3f73b634e88d", + "IPY_MODEL_b0721a5abbc044a59a9d119c65523ec3", + "IPY_MODEL_822cf5f25bbb4845a8df5b09df0c88f5" ], - "source": [ - "trainer.train()\n", - "metrics = trainer.evaluate()" - ] + "layout": "IPY_MODEL_d24e98b7ba64420588aeee6c0a7d787a" + } }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9117647058823529\n" - ] - } + "77131a41265342a28468664ad067d4c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "79e43db8a877400198f73dbab0105ade": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_02f439d46b384718b0c7ab5fd8aa2d03", + "placeholder": "​", + "style": "IPY_MODEL_742743267970469b8b358f051aeb3d7f", + "value": " 1.88k/1.88k [00:00<00:00, 114kB/s]" + } + }, + "7b3978443935464b9a1352ef3bd2d415": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d508b60c1f44aecb9e3f9711ecc0ce8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "820351c564124d7d871dc17681b747e9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7b3978443935464b9a1352ef3bd2d415", + "placeholder": "​", + "style": "IPY_MODEL_ec25ea55d62e4c0a9443d91d015bdbd6", + "value": " 605M/605M [00:04<00:00, 153MB/s]" + } + }, + "822cf5f25bbb4845a8df5b09df0c88f5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e831d4f1ad4d4f5f97bc9569a5294ca9", + "placeholder": "​", + "style": "IPY_MODEL_71f5c4292d4f4564bb0bf5af9368163f", + "value": " 1/1 [00:57<00:00, 57.78s/it]" + } + }, + "8294163893bc40c2bef26032babb3938": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "83569f1993e44a439c4c99d5d58251f2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8364e4e977ad41fab9688c5a7443f276": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_11dd9c8f6fa744cfbf25ce25f3c8291b", + "IPY_MODEL_4477e281c40c41f9a5c076e30640460e", + "IPY_MODEL_b800cade432f484ab1619620ba8a93b8" ], - "source": [ - "fewshot_relabelled_text_accuracy = metrics[\"accuracy\"]\n", - "pp.pprint(fewshot_relabelled_text_accuracy)" - ] + "layout": "IPY_MODEL_b57479b0aab44fae95c28b59905e053c" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "TLEiVC_KNYnP" - }, - "source": [ - "## Summary\n", - "\n", - "In this tutorial, we have learned to bulk-label a multi-modal dataset using a modified version of Argilla. We compared a few-shot classifier trained on the bulk-labelled dataset with zero-shot classifiers of image and text. The results show that the few-shot classifier is able to achieve a higher accuracy than the zero-shot classifiers. Furthermore, the SetFit model is significantly faster than the zero-shot classifiers.\n", - "\n", - "This approach can be applied to classification tasks with limited data and can be used to train a classifier with minimal human effort.\n", - "\n" - ] + "84d1a26c298b42cd820bcd2e4302a443": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAJ6CAYAAAD6q0KOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBdUlEQVR4nO3de1yW9f3H8TcoooaCZuCJRFHzhGZgDs8HPKdZblmZkqlzLQ9J9ZuWQlppueXMrEzNpi1TOznLpiWGR0pTYZZpmiCGouIBFAyUm98frXtjYJOWfPS+Xs/H435Mvvd1y9vHvuHb7/W9rsursLCwUAAAAEa8rQMAAABno4wAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAVHnrAJfD5XLpyJEjqlKliry8vKzjAACAy1BYWKizZ8+qdu3a8va+9PrHNVFGjhw5ouDgYOsYAADgZzh8+LDq1q17yfeviTJSpUoVST/8YapWrWqcBgAAXI7s7GwFBwe7/x6/lGuijPx4aqZq1aqUEQAArjH/bYsFG1gBAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU+WtAwAAnCVk4mrrCB4h9dl+1hF+MayMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU+WtA3iSkImrrSN4jNRn+1lH8AjMyV8OcxK4clgZAQAApigjAADAFGUEAACYoowAAABTP6uMvPTSSwoJCVHFihXVtm1bbdu27SePnz17tm666SZVqlRJwcHBmjBhgr7//vufFRgAAHiWUpeR5cuXKyYmRnFxcdq5c6datWqlXr166fjx4yUev3TpUk2cOFFxcXH6+uuv9dprr2n58uV6/PHH/+fwAADg2lfqMjJr1iyNGjVKw4cPV7NmzTRv3jxVrlxZixYtKvH4rVu3qn379rr33nsVEhKinj176p577vmvqykAAMAZSlVG8vPztWPHDkVFRf3rN/D2VlRUlBITE0v8TLt27bRjxw53+Th48KA++ugj9e3b95LfJy8vT9nZ2UVeAADAM5XqpmeZmZkqKChQUFBQkfGgoCDt3bu3xM/ce++9yszMVIcOHVRYWKiLFy/qd7/73U+eppkxY4amTp1ammgAAOAadcWvpklISND06dP18ssva+fOnXrvvfe0evVqPfXUU5f8zKRJk5SVleV+HT58+ErHBAAARkq1MlKjRg2VK1dOx44dKzJ+7Ngx1axZs8TPTJkyRUOHDtXIkSMlSWFhYcrJydFvf/tbPfHEE/L2Lt6HfH195evrW5poAADgGlWqlZEKFSooPDxc8fHx7jGXy6X4+HhFRkaW+Jnc3NxihaNcuXKSpMLCwtLmBQAAHqbUD8qLiYlRdHS0IiIidOutt2r27NnKycnR8OHDJUnDhg1TnTp1NGPGDElS//79NWvWLLVu3Vpt27bVgQMHNGXKFPXv399dSgAAgHOVuowMHjxYJ06cUGxsrDIyMnTzzTdrzZo17k2taWlpRVZCJk+eLC8vL02ePFnp6em64YYb1L9/fz3zzDO/3J8CAABcs0pdRiRpzJgxGjNmTInvJSQkFP0G5csrLi5OcXFxP+dbAQAAD8ezaQAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMEUZAQAApigjAADAFGUEAACYoowAAABTlBEAAGCKMgIAAExRRgAAgCnKCAAAMPWzyshLL72kkJAQVaxYUW3bttW2bdt+8vgzZ87ooYceUq1ateTr66vGjRvro48++lmBAQCAZylf2g8sX75cMTExmjdvntq2bavZs2erV69e2rdvnwIDA4sdn5+frx49eigwMFDvvPOO6tSpo0OHDikgIOCXyA8AAK5xpS4js2bN0qhRozR8+HBJ0rx587R69WotWrRIEydOLHb8okWLdOrUKW3dulU+Pj6SpJCQkP8tNQAA8BilOk2Tn5+vHTt2KCoq6l+/gbe3oqKilJiYWOJnVq1apcjISD300EMKCgpSixYtNH36dBUUFPxvyQEAgEco1cpIZmamCgoKFBQUVGQ8KChIe/fuLfEzBw8e1Pr16zVkyBB99NFHOnDggH7/+9/rwoULiouLK/EzeXl5ysvLc3+dnZ1dmpgAAOAacsWvpnG5XAoMDNT8+fMVHh6uwYMH64knntC8efMu+ZkZM2bI39/f/QoODr7SMQEAgJFSlZEaNWqoXLlyOnbsWJHxY8eOqWbNmiV+platWmrcuLHKlSvnHmvatKkyMjKUn59f4mcmTZqkrKws9+vw4cOliQkAAK4hpSojFSpUUHh4uOLj491jLpdL8fHxioyMLPEz7du314EDB+Ryudxj33zzjWrVqqUKFSqU+BlfX19VrVq1yAsAAHimUp+miYmJ0YIFC7R48WJ9/fXXevDBB5WTk+O+umbYsGGaNGmS+/gHH3xQp06d0vjx4/XNN99o9erVmj59uh566KFf7k8BAACuWaW+tHfw4ME6ceKEYmNjlZGRoZtvvllr1qxxb2pNS0uTt/e/Ok5wcLDWrl2rCRMmqGXLlqpTp47Gjx+vP/zhD7/cnwIAAFyzSl1GJGnMmDEaM2ZMie8lJCQUG4uMjNRnn332c74VAADwcDybBgAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABg6meVkZdeekkhISGqWLGi2rZtq23btl3W55YtWyYvLy8NHDjw53xbAADggUpdRpYvX66YmBjFxcVp586datWqlXr16qXjx4//5OdSU1P16KOPqmPHjj87LAAA8DylLiOzZs3SqFGjNHz4cDVr1kzz5s1T5cqVtWjRokt+pqCgQEOGDNHUqVPVoEGD/ykwAADwLKUqI/n5+dqxY4eioqL+9Rt4eysqKkqJiYmX/Ny0adMUGBioESNGXNb3ycvLU3Z2dpEXAADwTKUqI5mZmSooKFBQUFCR8aCgIGVkZJT4mc2bN+u1117TggULLvv7zJgxQ/7+/u5XcHBwaWICAIBryBW9mubs2bMaOnSoFixYoBo1alz25yZNmqSsrCz36/Dhw1cwJQAAsFS+NAfXqFFD5cqV07Fjx4qMHzt2TDVr1ix2/LfffqvU1FT179/fPeZyuX74xuXLa9++fQoNDS32OV9fX/n6+pYmGgAAuEaVamWkQoUKCg8PV3x8vHvM5XIpPj5ekZGRxY5v0qSJdu/eraSkJPdrwIAB6tq1q5KSkjj9AgAASrcyIkkxMTGKjo5WRESEbr31Vs2ePVs5OTkaPny4JGnYsGGqU6eOZsyYoYoVK6pFixZFPh8QECBJxcYBAIAzlbqMDB48WCdOnFBsbKwyMjJ08803a82aNe5NrWlpafL25sauAADg8pS6jEjSmDFjNGbMmBLfS0hI+MnP/uUvf/k53xIAAHgoljAAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADD1s8rISy+9pJCQEFWsWFFt27bVtm3bLnnsggUL1LFjR1WrVk3VqlVTVFTUTx4PAACcpdRlZPny5YqJiVFcXJx27typVq1aqVevXjp+/HiJxyckJOiee+7Rp59+qsTERAUHB6tnz55KT0//n8MDAIBrX6nLyKxZszRq1CgNHz5czZo107x581S5cmUtWrSoxOPffPNN/f73v9fNN9+sJk2aaOHChXK5XIqPj/+fwwMAgGtfqcpIfn6+duzYoaioqH/9Bt7eioqKUmJi4mX9Hrm5ubpw4YKqV69+yWPy8vKUnZ1d5AUAADxTqcpIZmamCgoKFBQUVGQ8KChIGRkZl/V7/OEPf1Dt2rWLFJr/NGPGDPn7+7tfwcHBpYkJAACuIWV6Nc2zzz6rZcuW6f3331fFihUvedykSZOUlZXlfh0+fLgMUwIAgLJUvjQH16hRQ+XKldOxY8eKjB87dkw1a9b8yc/+6U9/0rPPPqt169apZcuWP3msr6+vfH19SxMNAABco0q1MlKhQgWFh4cX2Xz642bUyMjIS35u5syZeuqpp7RmzRpFRET8/LQAAMDjlGplRJJiYmIUHR2tiIgI3XrrrZo9e7ZycnI0fPhwSdKwYcNUp04dzZgxQ5L03HPPKTY2VkuXLlVISIh7b4mfn5/8/Px+wT8KAAC4FpW6jAwePFgnTpxQbGysMjIydPPNN2vNmjXuTa1paWny9v7Xgssrr7yi/Px8/frXvy7y+8TFxenJJ5/839IDAIBrXqnLiCSNGTNGY8aMKfG9hISEIl+npqb+nG8BAAAcgmfTAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABgijICAABMUUYAAIApyggAADBFGQEAAKYoIwAAwBRlBAAAmKKMAAAAU5QRAABg6meVkZdeekkhISGqWLGi2rZtq23btv3k8W+//baaNGmiihUrKiwsTB999NHPCgsAADxPqcvI8uXLFRMTo7i4OO3cuVOtWrVSr169dPz48RKP37p1q+655x6NGDFCu3bt0sCBAzVw4EB9+eWX/3N4AABw7St1GZk1a5ZGjRql4cOHq1mzZpo3b54qV66sRYsWlXj8Cy+8oN69e+uxxx5T06ZN9dRTT+mWW27R3Llz/+fwAADg2le+NAfn5+drx44dmjRpknvM29tbUVFRSkxMLPEziYmJiomJKTLWq1cvrVy58pLfJy8vT3l5ee6vs7KyJEnZ2dmliVvmXHm51hE8xtX+//W1gjn5y2FO/nKYl7+Ma2FO/pixsLDwJ48rVRnJzMxUQUGBgoKCiowHBQVp7969JX4mIyOjxOMzMjIu+X1mzJihqVOnFhsPDg4uTVxcw/xnWycAimJO4mpzLc3Js2fPyt/f/5Lvl6qMlJVJkyYVWU1xuVw6deqUrr/+enl5eRkmu7ZlZ2crODhYhw8fVtWqVa3jAJKYl7j6MCd/OYWFhTp79qxq1679k8eVqozUqFFD5cqV07Fjx4qMHzt2TDVr1izxMzVr1izV8ZLk6+srX1/fImMBAQGliYqfULVqVf4Dw1WHeYmrDXPyl/FTKyI/KtUG1goVKig8PFzx8fHuMZfLpfj4eEVGRpb4mcjIyCLHS9Inn3xyyeMBAICzlPo0TUxMjKKjoxUREaFbb71Vs2fPVk5OjoYPHy5JGjZsmOrUqaMZM2ZIksaPH6/OnTvr+eefV79+/bRs2TJ98cUXmj9//i/7JwEAANekUpeRwYMH68SJE4qNjVVGRoZuvvlmrVmzxr1JNS0tTd7e/1pwadeunZYuXarJkyfr8ccfV6NGjbRy5Uq1aNHil/tT4LL4+voqLi6u2CkwwBLzElcb5mTZ8yr8b9fbAAAAXEE8mwYAAJiijAAAAFOUEQAAYIoyAgAATFFGAACAKcqIh3v99deVm8tDqXD1iIuL06FDh6xjAEUwL21RRjzcxIkTVbNmTY0YMUJbt261jgPob3/7m0JDQ9W9e3ctXbq0yBO6ASvMS1uUEQ+Xnp6uxYsXKzMzU126dFGTJk303HPP/eRTk4ErKSkpSdu3b1fz5s01fvx41axZUw8++KC2b99uHQ0Oxry0xU3PHOTYsWP661//qsWLF2vv3r3q3bu3RowYof79+xe5ay5QVi5cuKAPPvhAr7/+utauXasmTZpoxIgRuv/++y/r4VrAlcC8LHv8DeQgQUFB6tChgyIjI+Xt7a3du3crOjpaoaGhSkhIsI4HByosLNSFCxeUn5+vwsJCVatWTXPnzlVwcLCWL19uHQ8Oxbwse5QRBzh27Jj+9Kc/qXnz5urSpYuys7P14YcfKiUlRenp6brrrrsUHR1tHRMOsmPHDo0ZM0a1atXShAkT1Lp1a3399dfasGGD9u/fr2eeeUbjxo2zjgmHYV7a4TSNh+vfv7/Wrl2rxo0ba+TIkRo2bJiqV69e5Jjjx4+rZs2acrlcRinhJGFhYdq7d6969uypUaNGqX///ipXrlyRYzIzMxUYGMicRJlhXtoq9VN7cW0JDAzUhg0bFBkZecljbrjhBqWkpJRhKjjZXXfdpQceeEB16tS55DE1atTgBz7KFPPSFisjAADAFHtGPNy4ceM0Z86cYuNz587Vww8/XPaB4HiDBg3Sc889V2x85syZ+s1vfmOQCGBeWqOMeLh3331X7du3Lzberl07vfPOOwaJ4HQbN25U3759i4336dNHGzduNEgEMC+tUUY83MmTJ0u8Lr5q1arKzMw0SASnO3funCpUqFBs3MfHR9nZ2QaJAOalNcqIh2vYsKHWrFlTbPzvf/+7GjRoYJAIThcWFlbivRqWLVumZs2aGSQCmJfWuJrGw8XExGjMmDE6ceKEunXrJkmKj4/X888/r9mzZ9uGgyNNmTJFd955p7799tsic/Ktt97S22+/bZwOTsW8tMXVNA7wyiuv6JlnntGRI0ckSSEhIXryySc1bNgw42RwqtWrV2v69OlKSkpSpUqV1LJlS8XFxalz587W0eBgzEs7lBEHOXHihCpVqiQ/Pz/rKAAAuFFGAACAKfaMOMA777yjFStWKC0tTfn5+UXe27lzp1EqOFVBQYH+/Oc/X3JOnjp1yigZnIx5aYuraTzcnDlzNHz4cAUFBWnXrl269dZbdf311+vgwYPq06ePdTw40NSpUzVr1iwNHjxYWVlZiomJ0Z133ilvb289+eST1vHgUMxLW5ym8XBNmjRRXFyc7rnnHlWpUkXJyclq0KCBYmNjderUKc2dO9c6IhwmNDRUc+bMUb9+/VSlShUlJSW5xz777DMtXbrUOiIciHlpi5URD5eWlqZ27dpJkipVqqSzZ89KkoYOHaq33nrLMhocKiMjQ2FhYZIkPz8/ZWVlSZJuu+02rV692jIaHIx5aYsy4uFq1qzpPtd544036rPPPpMkpaSkiEUxWKhbt66OHj0q6Yd/jX788ceSpO3bt8vX19cyGhyMeWmLMuLhunXrplWrVkmShg8frgkTJqhHjx4aPHiw7rjjDuN0cKI77rhD8fHxkqSxY8dqypQpatSokYYNG6YHHnjAOB2cinlpiz0jHs7lcsnlcql8+R8unFq2bJm2bt2qRo0aafTo0SU+iwEoS5999pl7Tvbv3986DiCJeVnWKCMe7OLFi5o+fboeeOAB1a1b1zoOoAsXLmj06NGaMmWK6tevbx0HkMS8vBpQRjycn5+fvvzyS4WEhFhHASRJ/v7+SkpK4oc+rirMS1vsGfFw3bt314YNG6xjAG4DBw7UypUrrWMARTAvbXEHVg/Xp08fTZw4Ubt371Z4eLiuu+66Iu8PGDDAKBmcqlGjRpo2bZq2bNlS4pwcN26cUTI4GfPSFqdpPJy396UXv7y8vFRQUFCGaQD95DK4l5eXDh48WIZpgB8wL21RRgAAgCn2jAAAAFPsGfFw06ZN+8n3Y2NjyygJ8IP/dgOpRYsWlVES4F+Yl7YoIx7u/fffL/L1hQsXlJKSovLlyys0NJQygjJ3+vTpIl9fuHBBX375pc6cOaNu3boZpYLTMS9tUUY83K5du4qNZWdn6/777+d28DDxnwVZ+uFOwQ8++KBCQ0MNEgHMS2tsYHWo3bt3q3///kpNTbWOAkiS9u3bpy5durgfVgZcDZiXZYMNrA6VlZXlfkQ2cDX49ttvdfHiResYQBHMy7LBaRoPN2fOnCJfFxYW6ujRo3rjjTfUp08fo1RwspiYmCJf/zgnV69erejoaKNUcDrmpS1O03i4/7yRj7e3t2644QZ169ZNkyZNUpUqVYySwam6du1a5Ot/n5MPPPCA+wnTQFliXtqijAAAAFPsGfFwWVlZOnXqVLHxU6dOKTs72yARnC4lJUX79+8vNr5//342VMMM89IWZcTD3X333Vq2bFmx8RUrVujuu+82SASnu//++7V169Zi459//rnuv//+sg8EiHlpjdM0Hq569erasmWLmjZtWmR87969at++vU6ePGmUDE5VtWpV7dy5Uw0bNiwyfuDAAUVEROjMmTM2weBozEtbrIx4uLy8vBIvS7tw4YLOnz9vkAhO5+XlpbNnzxYbz8rK4inSMMO8tEUZ8XC33nqr5s+fX2x83rx5Cg8PN0gEp+vUqZNmzJhR5Ad8QUGBZsyYoQ4dOhgmg5MxL21xmsbDbdmyRVFRUWrTpo26d+8uSYqPj9f27dv18ccfq2PHjsYJ4TR79uxRp06dFBAQ4J5/mzZtUnZ2ttavX68WLVoYJ4QTMS9tUUYcICkpSX/84x+VlJSkSpUqqWXLlpo0aZIaNWpkHQ0OdeTIEc2dO1fJycnuOTlmzBhVr17dOhocjHlphzICAABMsWfEw3300Udau3ZtsfG1a9fq73//u0EiON3rr7+ut99+u9j422+/rcWLFxskApiX1igjHm7ixIkl7gQvLCzUxIkTDRLB6WbMmKEaNWoUGw8MDNT06dMNEgHMS2uUEQ+3f/9+NWvWrNh4kyZNdODAAYNEcLq0tLRiz0ySpHr16iktLc0gEcC8tEYZ8XD+/v46ePBgsfEDBw7ouuuuM0gEpwsMDNQ//vGPYuPJycm6/vrrDRIBzEtrlBEPd/vtt+vhhx/Wt99+6x47cOCAHnnkEQ0YMMAwGZzqnnvu0bhx4/Tpp5+qoKBABQUFWr9+vcaPH88jCmCGeWmLq2k8XFZWlnr37q0vvvhCdevWlSR999136tixo9577z0FBATYBoTj5Ofna+jQoXr77bfdj2V3uVwaNmyY5s2bpwoVKhgnhBMxL21RRhygsLBQn3zySZFr5zt16mQdCw73zTffuOdkWFiY6tWrZx0JYF4aoYwAAABT5a0D4MrLycnRhg0blJaWpvz8/CLvjRs3zigVnOy7777TqlWrSpyTs2bNMkoFp2Ne2qGMeLhdu3apb9++ys3NVU5OjqpXr67MzExVrlxZgYGBlBGUufj4eA0YMEANGjTQ3r171aJFC6WmpqqwsFC33HKLdTw4FPPSFlfTeLgJEyaof//+On36tCpVqqTPPvtMhw4dUnh4uP70pz9Zx4MDTZo0SY8++qh2796tihUr6t1339Xhw4fVuXNn/eY3v7GOB4diXtpiz4iHCwgI0Oeff66bbrpJAQEBSkxMVNOmTfX5558rOjpae/futY4Ih6lSpYqSkpIUGhqqatWqafPmzWrevLmSk5N1++23KzU11ToiHIh5aYuVEQ/n4+Mjb+8f/m8ODAx030nQ399fhw8ftowGh7ruuuvc5+Nr1apV5B44mZmZVrHgcMxLW+wZ8XCtW7fW9u3b1ahRI3Xu3FmxsbHKzMzUG2+8oRYtWljHgwP96le/0ubNm9W0aVP17dtXjzzyiHbv3q333ntPv/rVr6zjwaGYl7Y4TePhvvjiC509e1Zdu3bV8ePHNWzYMG3dulWNGjXSokWL1KpVK+uIcJiDBw/q3LlzatmypXJycvTII4+45+SsWbO4rwNMMC9tUUYgSdqyZYsiIiLk6+trHQWQJL311lsaMGAAz1DCVYV5eWVQRiBJqlq1qpKSktSgQQPrKIAk5iSuTszLK4MNrJD0wy3jgasJcxJXI+bllUEZAQAApigjAADAFGUEAACYooxAkuTl5WUdAQDgUJQRSGJTFq4+9erVk4+Pj3UMoAjm5ZVBGXGAixcvat26dXr11Vd19uxZSdKRI0d07tw59zFnz57lUjWUiQYNGujkyZPFxs+cOVNkDn755ZcKDg4uy2hwMOalLW4H7+EOHTqk3r17Ky0tTXl5eerRo4eqVKmi5557Tnl5eZo3b551RDhMamqqCgoKio3n5eUpPT3dIBHAvLRGGfFw48ePV0REhJKTk3X99de7x++44w6NGjXKMBmcZtWqVe5fr127Vv7+/u6vCwoKFB8fr5CQEINkcDLm5dWBO7B6uOuvv15bt27VTTfdpCpVqig5OVkNGjRQamqqmjVrptzcXOuIcIgfnx7t5eVVbI+Sj4+PQkJC9Pzzz+u2226ziAeHYl5eHVgZ8XAul6vEpcfvvvtOVapUMUgEp3K5XJKk+vXra/v27apRo4ZxIoB5ebVgA6uH69mzp2bPnu3+2svLS+fOnVNcXJz69u1rFwyOdfDgwUv+wGelDlaYl7YoIx7u+eef15YtW9SsWTN9//33uvfeexUSEqL09HQ999xz1vHgQFFRUSVuCPz888918803l30gQMxLa5QRD1e3bl0lJyfr8ccf14QJE9S6dWs9++yz2rVrlwIDA63jwYEqVqyoli1bavny5ZJ+WCZ/8skn1bFjR1brYIZ5aYsNrADK3EsvvaT/+7//0+23367U1FQdOnRIr7/+unr27GkdDQ7GvLRDGfFw/37Z2r/z8vJSxYoV1bBhQ9WvX7+MUwHSpEmT9Nxzz6l8+fJKSEhQu3btrCMBzEsjlBEP5+3tXeIlaz+OeXl5qUOHDlq5cqWqVatmlBJOcvr0aY0cOVLx8fH64x//qA0bNmjlypWaOXOmfv/731vHg0MxL22xZ8TDffLJJ2rTpo0++eQTZWVlKSsrS5988onatm2rDz/8UBs3btTJkyf16KOPWkeFQ7Ro0ULHjh3Trl27NGrUKP31r3/Va6+9pilTpqhfv37W8eBQzEtjhfBozZs3L9yyZUux8c2bNxc2a9assLCwsPCTTz4pDA4OLutocKhp06YVFhQUFBs/fPhwYVRUlEEigHlpjdM0Hq5SpUravn27WrRoUWR89+7duvXWW3X+/HkdOnRITZs25Vp6lLnvv/9eFStWtI4BFMG8LHucpvFw4eHheuyxx3TixAn32IkTJ/R///d/atOmjSRp//79PIUSZcblcumpp55SnTp15Ofnp4MHD0qSpkyZotdee804HZyKeWmLMuLhXnvtNaWkpKhu3bpq2LChGjZsqLp16yo1NVULFy6UJJ07d06TJ082TgqnePrpp/WXv/xFM2fOVIUKFdzjLVq0cM9JoKwxL21xmsYBXC6XPv74Y33zzTeSpJtuukk9evRwPyAKKEsNGzbUq6++qu7duxd5eOPevXsVGRmp06dPW0eEAzEvbfGgPAfw9vZW79691bt3b+sogNLT09WwYcNi4y6XSxcuXDBIBDAvrVFGHCAnJ0cbNmxQWlqa8vPzi7w3btw4o1RwqmbNmmnTpk2qV69ekfF33nlHrVu3NkoFp2Ne2qKMeLhdu3apb9++ys3NVU5OjqpXr67MzExVrlxZgYGBlBGUudjYWEVHRys9PV0ul0vvvfee9u3bpyVLlujDDz+0jgeHYl7aYs+Ih+vSpYsaN26sefPmyd/fX8nJyfLx8dF9992n8ePH684777SOCAfatGmTpk2bpuTkZJ07d0633HKLYmNjeQYITDEv7VBGPFxAQIA+//xz3XTTTQoICFBiYqKaNm2qzz//XNHR0dq7d691RACAw3E5hYfz8fFxXzUTGBiotLQ0SZK/v78OHz5sGQ0O1aBBA508ebLY+JkzZ9SgQQODRADz0hp7Rjxc69attX37djVq1EidO3dWbGysMjMz9cYbbxS7KytQFlJTU1VQUFBsPC8vT+np6QaJAOalNcqIh5s+fbrOnj0rSXrmmWc0bNgwPfjgg2rUqJEWLVpknA5OsmrVKvev165dK39/f/fXBQUFio+PV0hIiEEyOBnz8urAnhEAZeLH04VeXl76zx87Pj4+CgkJ0fPPP6/bbrvNIh4cinl5daCMAChT9evX1/bt21WjRg3rKIAb89IWG1g93MmTJ/XQQw+pWbNmqlGjhqpXr17kBZS1lJSUy/qBHxYWxiZrlBnmpS32jHi4oUOH6sCBAxoxYoSCgoLk5eVlHQm4LKmpqdyGG1cd5uWVQRnxcJs2bdLmzZvVqlUr6ygAAJSI0zQerkmTJjp//rx1DAAALoky4uFefvllPfHEE9qwYYNOnjyp7OzsIi8AAKxxmsbDBQQEKDs7W926dSsyXlhYKC8vrxJv8gMAQFmijHi4IUOGyMfHR0uXLmUDKwDgqkQZ8XBffvmldu3apZtuusk6CiBJWrJkiQYPHixfX98i4/n5+Vq2bJmGDRsmSXr11VcVFBRkERG4JObllcFNzzxcp06dFBsbq6ioKOsogCSpXLlyOnr0qAIDA4uMnzx5UoGBgZw6RJmZM2fOZR87bty4K5gElBEP9/bbb+vJJ5/UY489prCwMPn4+BR5v2XLlkbJ4FTe3t46duyYbrjhhiLjycnJ6tq1q06dOmWUDE5Tv379Il+fOHFCubm5CggIkPTDE3srV66swMBAHTx40CChc1BGPNyPz134dz8+g4ENrChLrVu3lpeXl5KTk9W8eXOVL/+vs8QFBQVKSUlR7969tWLFCsOUcKqlS5fq5Zdf1muvveY+rb1v3z6NGjVKo0eP1pAhQ4wTejbKiIc7dOjQT75fr169MkoCp5s6dar7fx955BH5+fm536tQoYJCQkI0aNAgVahQwSoiHCw0NFTvvPOOWrduXWR8x44d+vWvf62UlBSjZM7ABlYPR9nA1SIuLk6SFBISosGDB6tixYrGiYB/OXr0qC5evFhsvKCgQMeOHTNI5CysjHigVatWqU+fPvLx8dGqVat+8tgBAwaUUSqgqB07dujrr7+WJDVv3rzYv0iBstS/f3+lp6dr4cKFuuWWWyT9MEd/+9vfqk6dOv/1Zyn+N5QRD+Tt7a2MjAwFBgaWuGfkR+wZgYXjx4/r7rvvVkJCQpGNgl27dtWyZcuKbWwFysKJEycUHR2tNWvWuDf6X7x4Ub169dJf/vKXYld/4ZdFGQFQpgYPHqyDBw9qyZIlatq0qSRpz549io6OVsOGDfXWW28ZJ4STffPNN9q7d6+kH57t1bhxY+NEzkAZgSQpLCxMH330kYKDg62jwMP5+/tr3bp1atOmTZHxbdu2qWfPnjpz5oxNMEA/3HwvJSVFoaGhRa74wpXFg/IgSUpNTdWFCxesY8ABXC5XsfvdSJKPj49cLpdBIkDKzc3ViBEjVLlyZTVv3lxpaWmSpLFjx+rZZ581Tuf5KCMAylS3bt00fvx4HTlyxD2Wnp6uCRMmqHv37obJ4GSTJk1ScnKyEhISilzpFRUVpeXLlxsmcwbKCIAyNXfuXGVnZyskJEShoaEKDQ1V/fr1lZ2drRdffNE6Hhxq5cqVmjt3rjp06FDkgaLNmzfXt99+a5jMGTghBqBMBQcHa+fOnVq3bp17o2DTpk15fhJMnThxosQrZnJycnjaeRmgjAAoc15eXurRo4d69OhhHQWQJEVERGj16tUaO3asJLkLyMKFCxUZGWkZzREoIwDKXHx8vOLj43X8+PFim1YXLVpklApONn36dPXp00d79uzRxYsX9cILL2jPnj3aunWrNmzYYB3P47FnxMMtWbJEeXl5xcbz8/O1ZMkS99evvvqqgoKCyjIaHGrq1Knq2bOn4uPjlZmZqdOnTxd5ARY6dOigpKQkXbx4UWFhYfr4448VGBioxMREhYeHW8fzeNxnxMOVK1dOR48eLXYu9OTJkwoMDOQOrChztWrV0syZMzV06FDrKACuEpym8XCFhYUlbr767rvv5O/vb5AITpefn6927dpZxwCUnZ192cdWrVr1CiYBZcRDtW7dWl5eXvLy8lL37t2L3EmwoKBAKSkp6t27t2FCONXIkSO1dOlSTZkyxToKHC4gIOC/Xinz4z/oWEW+sigjHmrgwIGSpKSkJPXq1Ut+fn7u9ypUqKCQkBANGjTIKB2cJiYmxv1rl8ul+fPna926dWrZsmWxu7HOmjWrrOPBoT799FPrCPgn9ox4uMWLF2vw4MFF7igIlLWuXbte1nFeXl5av379FU4D4GpDGXGIHTt26Ouvv5b0wx0FW7dubZwIAGz94x//uOxjW7ZseQWTgDLi4Y4fP667775bCQkJCggIkCSdOXNGXbt21bJly3TDDTfYBoTjZWdna/369WrSpImaNGliHQcO4u3tLS8vL/23vwbZM3LlsWfEw40dO1Znz57VV199paZNm0qS9uzZo+joaI0bN05vvfWWcUI4zV133aVOnTppzJgxOn/+vCIiIpSamqrCwkItW7aMvUwoMykpKdYR8E+sjHg4f39/rVu3Tm3atCkyvm3bNvXs2VNnzpyxCQbHqlmzptauXatWrVpp6dKliouLU3JyshYvXqz58+dr165d1hEBlDHuwOrhXC5XsasVJMnHx6fYbbiBspCVlaXq1atLktasWaNBgwapcuXK6tevn/bv32+cDk72xhtvqH379qpdu7YOHTokSZo9e7b+9re/GSfzfJQRD9etWzeNHz9eR44ccY+lp6drwoQJ6t69u2EyOFVwcLASExOVk5OjNWvWqGfPnpKk06dPc9UXzLzyyiuKiYlR3759debMGfcekYCAAM2ePds2nANQRjzc3LlzlZ2drZCQEIWGhio0NFT169dXdna2XnzxRet4cKCHH35YQ4YMUd26dVW7dm116dJFkrRx40aFhYXZhoNjvfjii1qwYIGeeOIJlStXzj0eERGh3bt3GyZzBvaMOEBhYaHWrVunvXv3SpKaNm2qqKgo41Rwsh07digtLU09evRw35Bv9erVCggIUPv27Y3TwYkqVaqkvXv3ql69eqpSpYqSk5PVoEED7d+/Xy1bttT58+etI3o0rqZxAC8vL/Xo0UM9evSwjgJIksLDw4s9CbVfv35Fvq5ataqSkpLUoEGDsowGh6pfv76SkpJUr169IuNr1qxxX4mIK4cy4gDx8fGKj4/X8ePHi21aXbRokVEq4KexaIuyFBMTo4ceekjff/+9CgsLtW3bNr311luaMWOGFi5caB3P41FGPNzUqVM1bdo0RUREqFatWv/1oVAA4EQjR45UpUqVNHnyZOXm5uree+9V7dq19cILL+juu++2jufx2DPi4WrVqqWZM2dq6NCh1lGAUvn38/ZAWcrNzdW5c+cUGBhoHcUxWBnxcPn5+WrXrp11DAC4Jhw/flz79u2T9MN+Ox6ZUTa4tNfDjRw5UkuXLrWOAZQapxRRls6ePauhQ4eqdu3a6ty5szp37qzatWvrvvvuU1ZWlnU8j8fKiAeKiYlx/9rlcmn+/Plat26dWrZsWexurLNmzSrreMBl4QwyytLIkSO1a9curV69WpGRkZKkxMREjR8/XqNHj9ayZcuME3o29ox4oK5du17WcV5eXlq/fv0VTgNc2o8/fkpaBdm8ebPatGkjX1/fso4FB7ruuuu0du1adejQocj4pk2b1Lt3b+Xk5BglcwZWRjzQp59+ah0B+ElLlizRH//4R/ezaBo3bqzHHnusyEbr//xLAbiSrr/+evn7+xcb9/f3V7Vq1QwSOQt7RhwmOztbK1eudN+NFShrs2bN0oMPPqi+fftqxYoVWrFihXr37q3f/e53+vOf/2wdDw41efJkxcTEKCMjwz2WkZGhxx57TFOmTDFM5gycpvFwd911lzp16qQxY8bo/PnzatWqlVJTU1VYWKhly5Zp0KBB1hHhMPXr19fUqVM1bNiwIuOLFy/Wk08+qZSUFKNkcJrWrVsXOUW4f/9+5eXl6cYbb5QkpaWlydfXV40aNdLOnTutYjoCp2k83MaNG/XEE09Ikt5//30VFhbqzJkzWrx4sZ5++mnKCMrc0aNHS7zcvF27djp69KhBIjjVwIEDrSPgnygjHi4rK0vVq1eX9MMzFgYNGqTKlSurX79+euyxx4zTwYkaNmyoFStW6PHHHy8yvnz5cjVq1MgoFZwoLi7OOgL+iTLi4YKDg5WYmKjq1atrzZo17svTTp8+rYoVKxqngxNNnTpVgwcP1saNG91P6N2yZYvi4+O1YsUK43QALFBGPNzDDz+sIUOGyM/PT/Xq1VOXLl0k/XD6JiwszDYcHGnQoEHatm2bZs2apZUrV0qSmjZtqm3btql169a24eBYBQUF+vOf/6wVK1YoLS1N+fn5Rd4/deqUUTJnYAOrA+zYsUNpaWnq0aOH/Pz8JEmrV69WQECA+1+mQFm4cOGCRo8erSlTpqh+/frWcQC32NhYLVy4UI888ogmT56sJ554QqmpqVq5cqViY2M1btw464gejTICSVLVqlWVlJTEQ8lwxfn7+yspKYkygqtKaGio5syZo379+qlKlSpKSkpyj3322Wc8VuMK4z4jkMStt1F2Bg4c6D49A1wtMjIy3Keu/fz83M+jue2227R69WrLaI7AnhEAZapRo0aaNm2atmzZovDwcF133XVF3mc5HBbq1q2ro0eP6sYbb1RoaKg+/vhj3XLLLdq+fTuPJCgDnKaBJKlKlSpKTk7mNA2uuJ86PePl5aWDBw+WYRrgBxMnTlTVqlX1+OOPa/ny5brvvvsUEhKitLQ0TZgwQc8++6x1RI9GGYEkyggA/LvExEQlJiaqUaNG6t+/v3Ucj0cZgSQ2sKLs5efnKyUlRaGhoSpfnjPGgJPxEwCS2MCKspObm6uxY8dq8eLFkqRvvvlGDRo00NixY1WnTh1NnDjROCGcYtWqVZd97IABA65gElBGHOTHwvHvD4b60d///nfVqVOnrCPBgSZNmqTk5GQlJCSod+/e7vGoqCg9+eSTlBGUmct9No2Xl5cKCgqubBiH49JeB1iyZInCwsJUqVIlVapUSS1bttQbb7xR5JgOHTqwYxxlYuXKlZo7d646dOhQpBg3b95c3377rWEyOI3L5bqsF0XkymNlxMPNmjVLU6ZM0ZgxY9x3W928ebN+97vfKTMzUxMmTDBOCKc5ceKEAgMDi43n5OSUuGoHlLXvv/+eZ3eVMVZGPNyLL76oV155Rc8995wGDBigAQMGaObMmXr55Zc1Z84c63hwoIiIiCI3kfqxgCxcuFCRkZFWseBwBQUFeuqpp1SnTh35+fm5LzGfMmWKXnvtNeN0no+VEQ939OhRtWvXrth4u3btdPToUYNEcLrp06erT58+2rNnjy5evKgXXnhBe/bs0datW7VhwwbreHCoZ555RosXL9bMmTM1atQo93iLFi00e/ZsjRgxwjCd52NlxMM1bNiwxMeyL1++XI0aNTJIBKfr0KGDkpOTdfHiRYWFhenjjz9WYGCgEhMTFR4ebh0PDrVkyRLNnz9fQ4YMUbly5dzjrVq10t69ew2TOQMrIx5u6tSpGjx4sDZu3OjeM7JlyxbFx8eXWFKAK61bt27q3LmzFixYUGT89OnT6tatm9avX2+UDE6Wnp6uhg0bFht3uVy6cOGCQSJnYWXEww0aNEjbtm1TjRo1tHLlSq1cuVI1atTQtm3bdMcdd1jHgwMlJCRo7ty5GjhwoHJzc93j+fn5nKaBmWbNmmnTpk3Fxt955x21bt3aIJGzsDLiwS5cuKDRo0drypQp+utf/2odB3Bbt26dRo8erbZt2+qDDz5QSEiIdSQ4XGxsrKKjo5Weni6Xy6X33ntP+/bt05IlS/Thhx9ax/N4rIx4MB8fH7377rvWMYBiatWqpQ0bNigsLExt2rRRQkKCdSQ43O23364PPvhA69at03XXXafY2Fh9/fXX+uCDD9SjRw/reB6PMuLhBg4cqJUrV1rHANx+vJTX19dXS5cu1fjx49W7d2+9/PLLxsngVBcvXtS0adNUv359ffLJJzp+/Lhyc3O1efNm9ezZ0zqeI/CgPA/39NNP6/nnn1f37t0VHh6u6667rsj748aNM0oGp/L29lZGRkaRG5+9++67io6O1vnz57nbJUz4+fnpyy+/5JShEcqIh6tfv/4l3/Py8nLf2AcoK4cOHdKNN95Y7G6rX331lb744gtFR0cbJYOT3X777brzzjuZf0YoIwAAx5s3b56mTp2qIUOGlLiKzFN7ryzKiEPk5+crJSVFoaGhKl+ei6gA4N95e196CyVP7b3y2MDq4XJzczVixAhVrlxZzZs3V1pamiRp7NixevbZZ43TAcDVgaf22qKMeLhJkyYpOTlZCQkJRZ5CGRUVpeXLlxsmA4BrT1hYmA4fPmwdw+OwXu/hVq5cqeXLl+tXv/pVkQ2DzZs317fffmuYDACuPampqdwe/gpgZcTDnThxosgllD/KyckpdjUDAAAWKCMeLiIiQqtXr3Z//WMBWbhwoSIjI61iAQDgxmkaDzd9+nT16dNHe/bs0cWLF/XCCy9oz5492rp1Kw8lAwBcFVgZ8XAdOnRQcnKyLl68qLCwMH388ccKDAxUYmKiwsPDreMBAMDKiKfr1q2bOnfurAULFhQZP336tLp166b169cbJQMA4AesjHi4hIQEzZ07VwMHDlRubq57PD8/n9M0APBPS5YsUV5eXrHx/Px8LVmyxP31q6++qqCgoLKM5gjcgdXDeXt7a9euXRo9erRycnL0wQcfKCQkRMeOHVPt2rW5mQ8ASCpXrpyOHj1a7OrDkydPKjAwkJ+VVxgrIw5Qq1YtbdiwQWFhYWrTpo0SEhKsIwHAVaWwsLDE2x1899138vf3N0jkLOwZ8XA//sfl6+urpUuX6umnn1bv3r31hz/8wTgZANhr3bq1vLy85OXlpe7duxd5dldBQYFSUlLUu3dvw4TOQBnxcP95Fm7y5Mlq2rQpj8kGAEkDBw6UJCUlJalXr17y8/Nzv1ehQgWFhIRo0KBBRumcgz0jHu7QoUO68cYbiy0/fvXVV/riiy8oJQAgafHixRo8eHCRZ3ih7FBGAAD4px07dujrr7+W9MMzvFq3bm2cyBk4TQMAcLzjx4/r7rvvVkJCggICAiRJZ86cUdeuXbVs2TLdcMMNtgE9HFfTAAAcb+zYsTp79qy++uornTp1SqdOndKXX36p7OxsjRs3zjqex+M0DQDA8fz9/bVu3Tq1adOmyPi2bdvUs2dPnTlzxiaYQ7AyAgBwPJfLJR8fn2LjPj4+crlcBomchTICAHC8bt26afz48Tpy5Ih7LD09XRMmTFD37t0NkzkDp2kAAI53+PBhDRgwQF999ZWCg4PdYy1atNCqVatUt25d44SejTICAIB+uEnkunXrtHfvXklS06ZNFRUVZZzKGSgjAADAFPcZAQBAUnx8vOLj43X8+PFim1YXLVpklMoZKCMAAMebOnWqpk2bpoiICNWqVavEJ/jiyuE0DQDA8WrVqqWZM2dq6NCh1lEciUt7AQCOl5+fr3bt2lnHcCzKCADA8UaOHKmlS5dax3AsTtMAABwpJibG/WuXy6XFixerZcuWatmyZbG7sc6aNaus4zkKZQQA4Ehdu3a9rOO8vLy0fv36K5zG2SgjAADAFHtGAAD4D9nZ2Vq5cqX7bqy4sigjAADHu+uuuzR37lxJ0vnz5xUREaG77rpLYWFhevfdd43TeT7KCADA8TZu3KiOHTtKkt5//30VFhbqzJkzmjNnjp5++mnjdJ6PMgIAcLysrCxVr15dkrRmzRoNGjRIlStXVr9+/bR//37jdJ6PMgIAcLzg4GAlJiYqJydHa9asUc+ePSVJp0+fVsWKFY3TeT6eTQMAcLyHH35YQ4YMkZ+fn+rVq6cuXbpI+uH0TVhYmG04B+DSXgAAJH3xxRc6fPiwevToIT8/P0nS6tWrFRAQoPbt2xun82yUEQCA4x08eFANGjSwjuFYlBEAgON5e3urbt266ty5s7p06aLOnTurYcOG1rEcgzICAHC89PR0JSQkaMOGDdqwYYP279+v2rVrq3PnzuratatGjhxpHdGjUUYAAPgP+/fv1zPPPKM333xTLpdLBQUF1pE8GlfTAAAcLzc3V5s3b1ZCQoISEhK0a9cuNWnSRGPGjHFfWYMrh5URAIDjVahQQdWqVdOQIUPUpUsXdezYUdWqVbOO5RisjAAAHK9v377avHmzli1bpoyMDGVkZKhLly5q3LixdTRHYGUEAIB/+sc//uHexLpp0yaVL19eXbp00ZtvvmkdzaNRRgAA+KfCwkLt2rVLn376qT799FOtXbtWhYWFunjxonU0j8azaQAAjjdr1iwNGDBA119/vdq2bau33npLjRs31rvvvqsTJ05Yx/N4rIwAAByvTZs27huedezYUf7+/taRHIUyAgAATHGaBgAASZs2bdJ9992nyMhIpaenS5LeeOMNbd682TiZ56OMAAAc791331WvXr1UqVIl7dq1S3l5eZKkrKwsTZ8+3Tid56OMAAAc7+mnn9a8efO0YMEC+fj4uMfbt2+vnTt3GiZzBsoIAMDx9u3bp06dOhUb9/f315kzZ8o+kMNQRgAAjlezZk0dOHCg2PjmzZvVoEEDg0TOQhkBADjeqFGjNH78eH3++efy8vLSkSNH9Oabb+rRRx/Vgw8+aB3P4/FsGgCAI/3jH/9QixYt5O3trUmTJsnlcql79+7Kzc1Vp06d5Ovrq0cffVRjx461jurxuM8IAMCRypUrp6NHjyowMFANGjTQ9u3bVaVKFR04cEDnzp1Ts2bN5OfnZx3TEVgZAQA4UkBAgFJSUhQYGKjU1FS5XC5VqFBBzZo1s47mOJQRAIAjDRo0SJ07d1atWrXk5eWliIgIlStXrsRjDx48WMbpnIUyAgBwpPnz5+vOO+/UgQMHNG7cOI0aNUpVqlSxjuVI7BkBADje8OHDNWfOHMqIEcoIAAAwxX1GAACAKcoIAAAwRRkBAACmKCMAAMAUZQQAAJiijAAAAFOUEQAAYIoyAgAATP0/52uwjS/y/4sAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "8513e53c07c04d75999be1fec6d5b491": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "857c37c3638b4c7d8ffaaaa9fe5932f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "87a472fb7193452faea139ab6bf1c9be": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_082dfef575c94327a5377c3f7278cf5a", + "placeholder": "​", + "style": "IPY_MODEL_84d1a26c298b42cd820bcd2e4302a443", + "value": "Downloading data: 100%" + } + }, + "88677a526eae40a1b5e0b5e9e7a25218": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_139f9e8c84f447b985c2080c17d14a8c", + "max": 85317, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1d37c4be099b44b5bc67c2cd976693b9", + "value": 85317 + } + }, + "88901ee5ca86458288bf82a22db7e379": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "88edaefb70c34854ad6f16d5a82b40e7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ba93d6edfed5434397d869ba2bd7aca5", + "IPY_MODEL_7282d9ef9a754bcfb1297081296aed5a", + "IPY_MODEL_946c67b0a3f54ce09c35db7bef44991c" ], - "source": [ - "from pandas import Series\n", - "\n", - "Series(\n", - " dict(\n", - " zero_shot_image_accuracy=zero_shot_image_accuracy,\n", - " zero_shot_text_accuracy=zero_shot_text_accuracy,\n", - " fewshot_relabelled_text_accuracy=fewshot_relabelled_text_accuracy,\n", - " )\n", - ").plot.bar()" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "machine_shape": "hm", - "provenance": [] - }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "vscode": { - "interpreter": { - "hash": "97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777" - } - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "01939245f8874aaca606e7a1e8b954a8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_57b9cf8fe2fa4d918e8dc15e2efb6153", - "placeholder": "​", - "style": "IPY_MODEL_4f166794a0ad4061878ccfc623ec83c0", - "value": " 252/252 [00:02<00:00, 101.31it/s]" - } - }, - "01bc8315e7cd4064a8653eae44cea192": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "0286aff9c56b43e7abd15f17dd33d6db": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "02b23c74c75e425cafe0c231b2892c47": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "02f439d46b384718b0c7ab5fd8aa2d03": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "03390d37eba24242ae6c2099d5fe7863": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "046bafaebfd14a6282e231c6a7c22b3e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "052b62b792fc4f1183ffb91f18d655c7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "05c50612c7ec41c7877d85c9d0502c0b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "082dfef575c94327a5377c3f7278cf5a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "088db72b2c4a41adb5b6f543c4fd4070": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_47a715f99b224f41b21d9ddc7467df52", - "placeholder": "​", - "style": "IPY_MODEL_b179999ec76e48579fd195b7c788f366", - "value": "Downloading (…)okenizer_config.json: 100%" - } - }, - "090c3e60ff0d4e5fa930767672802d86": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "094d2d5dc22d4f93918468209abbca2e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "097dc95081d04f7e9f803384b5dee1ea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5748285ca2ba4b93b30621437e931345", - "placeholder": "​", - "style": "IPY_MODEL_3017a4909cfe49648a9478ea14b9d062", - "value": "Downloading (…)ce_transformers.json: 100%" - } - }, - "0987a64b9cea4850998e78d76d31c3b8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0ab25a53cd28408c802a5dcda0e1206e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0ad0c441b68a48c0a8fb9436050d2a1d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1183c04b5df54255bad175f72d4a9153", - "IPY_MODEL_4378864bd5004edcba65b59fecbe84d3", - "IPY_MODEL_6f63b713ac2b4358b939c2818d15e67f" - ], - "layout": "IPY_MODEL_89ef6f10348a408a8546452e1fa74528" - } - }, - "0b4387ee1a0d4f769b391dcbc4d5477c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0c28f7e5514c432197ed1b0e24fcc39b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0d3cbcf01ca14063be3ae129db78fdcb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "0d4090c9cfd542cba1a14d30b154f733": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "0f590a3e9ed148c9b076fa3b16e287f8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0f993203b2004905992240787135ccb7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "11064b742bf34b45bbeb67040759fe94": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1183c04b5df54255bad175f72d4a9153": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7451e0901e6f40f5971359c9ac5d47bb", - "placeholder": "​", - "style": "IPY_MODEL_fe63dbf326194c6c813327bd643a85cd", - "value": "100%" - } - }, - "11dd9c8f6fa744cfbf25ce25f3c8291b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dd355cdd1594499bbccc8a7842a0ba93", - "placeholder": "​", - "style": "IPY_MODEL_435b1f4a70694161a749af8ea40baabb", - "value": "Downloading (…)cad52eb/modules.json: 100%" - } - }, - "12da828a56fd47fbac3881a78ae3b40c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_20ba960a7c5d4a4fa43ee8c90c412b06", - "max": 690, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_27d6c644f03a4874a0a0589cce8a9624", - "value": 690 - } - }, - "12e8ab3cc4fe40e7bbb056b3354603bc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "13942a35ebd3432d9e3a107c54af9619": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bebd64b52d24437dabeb5119235853d7", - "placeholder": "​", - "style": "IPY_MODEL_094d2d5dc22d4f93918468209abbca2e", - "value": "Downloading (…)rocessor_config.json: 100%" - } - }, - "139f9e8c84f447b985c2080c17d14a8c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "14b5a9186b3a47108fafa070c06ab35c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f7d2ce646a4c42ed893383a4f75af2d6", - "placeholder": "​", - "style": "IPY_MODEL_2a967808803e44fea67bd9958542eb43", - "value": "100%" - } - }, - "14cc3d3e93c54226bc2f38a4745c1d23": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "15d85f4fe15545bc85d3717d7af7bbb1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "16162c715d4745f3aa91c7f38a78a32d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "16798f3b9a934353ae8e11cdd22820ca": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0f993203b2004905992240787135ccb7", - "placeholder": "​", - "style": "IPY_MODEL_e8756ee3f2f64e01b2790714b6704584", - "value": " 604/604 [00:00<00:00, 39.1kB/s]" - } - }, - "16bd920f956f43b3921b79cec4d76d87": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_16fccf90f2974a938167677b189b67a2", - "placeholder": "​", - "style": "IPY_MODEL_714590bcc27e4efc837f870812929f3e", - "value": "Extracting data files: 100%" - } - }, - "16fccf90f2974a938167677b189b67a2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1a951db095d441e295bb9ff00d4dba30": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1ad0c7637293497a9a75bfa45c1a06e8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1d37c4be099b44b5bc67c2cd976693b9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "1e2da1ce24984300af0b7246ebd59b14": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "20163aeba069412eabd3b506a339667f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b577441dde9e43cb9644244ca0cf336e", - "placeholder": "​", - "style": "IPY_MODEL_b850f11900ae4874963b35e0b86470e5", - "value": " 0/? [00:00<?, ? tables/s]" - } - }, - "20ba960a7c5d4a4fa43ee8c90c412b06": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "20dad493a55343efaabbc6f7c1966cf0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2108640a73e44fe28bd0445abdb2048d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_11064b742bf34b45bbeb67040759fe94", - "max": 316, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_83569f1993e44a439c4c99d5d58251f2", - "value": 316 - } - }, - "22a1ecc265c7419b99b89dc635351163": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2433cd897d734c78b3d4a62602865415": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2587fb3caed54a10829e8ba594510e7a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c59a11b4cdcd45a181e5417839a21245", - "placeholder": "​", - "style": "IPY_MODEL_b9850f02e9ec4069aa836d9ed11aa019", - "value": " 316/316 [00:00<00:00, 19.1kB/s]" - } - }, - "26175752e18140b4a944bfc55f77a069": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2685384c30bb4865b7f14f62fbe516a0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "27d3f1981c244549901253b3266de1ff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "27d6c644f03a4874a0a0589cce8a9624": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2a967808803e44fea67bd9958542eb43": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "2db4287290eb4c25bc440fc3f2f1d258": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d8f11947b0d048489bdd649479c50b1f", - "placeholder": "​", - "style": "IPY_MODEL_70eee61c4d3842f69569dee3ac8773fa", - "value": "Downloading data files: 100%" - } - }, - "2e4deb55d7b74e2fa09c9f925fd2e751": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2f093aa6d36c4622bd09d1253bf6c3d4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2f87bde2c64b42bc90ed461265cfe1f1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2f883c076ee7470481eb7d0fcb25f685": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3017a4909cfe49648a9478ea14b9d062": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "34453701eb2f40df9a2a0154c02990d4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fadb08eafb084a4e91f8406055beb502", - "placeholder": "​", - "style": "IPY_MODEL_8aae0197e477451a890b1f93cdeebe10", - "value": " 34/34 [02:30<00:00, 4.40s/ex]" - } - }, - "35a4c4fa675546ccb34d38b6e1c7139c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "35c4bd29723b46ad94b098685953aefb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8c72c24c20b84bd4a007929384e91b78", - "placeholder": "​", - "style": "IPY_MODEL_515f4f3435ef468499acd8e48daf709b", - "value": " 4.03k/4.03k [00:00<00:00, 250kB/s]" - } - }, - "35f1224563044be39e254342c6c5b635": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_87a472fb7193452faea139ab6bf1c9be", - "IPY_MODEL_eb9b58dab8044074b1cac3e39f26e561", - "IPY_MODEL_6359430089654ca4bdd587c125b317a0" - ], - "layout": "IPY_MODEL_be06bfcbe41040369d025626b8227b6b" - } - }, - "36156c14c8d544deab0ea65ac5ba7508": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3637b3da70d34d529d48d630baeaf13b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_56d32c90b86c48b08d020ed3dcfd3a4e", - "placeholder": "​", - "style": "IPY_MODEL_2f883c076ee7470481eb7d0fcb25f685", - "value": "Downloading (…)cial_tokens_map.json: 100%" - } - }, - "375cbfe47f9f4eadaf1294be69b16dfd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_088db72b2c4a41adb5b6f543c4fd4070", - "IPY_MODEL_51f3dbca5cb04c818bb65cf4adc7914d", - "IPY_MODEL_16798f3b9a934353ae8e11cdd22820ca" - ], - "layout": "IPY_MODEL_97ff6a0b5366407c8138eedd05532d77" - } - }, - "3916f72e314d46e883db7e529210b098": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_540bda296c904aa48c10db351a11ca66", - "max": 34, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c1f28cff3e064e46a8c9bdd0d705a099", - "value": 34 - } - }, - "39555af32eeb44bfa49b04aa493320ba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3c370e064d684a9f94ca552bbcc893d0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b13577cd8ab54745b1f75619740f7e8d", - "max": 524619, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6b7bd8943d824ad586939a5279368a0d", - "value": 524619 - } - }, - "3c8aa1d02f1541bdb59b0a8dff27148f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_27d3f1981c244549901253b3266de1ff", - "placeholder": "​", - "style": "IPY_MODEL_9aae9636296a4245989388403103e0eb", - "value": "Downloading data: 100%" - } - }, - "3e50350e1d3d491ababa8cd3764be1d1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_75686e22371d4d3babec811f49d280f9", - "max": 1875, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_70d08a9270294f8380cc7c740700ca8c", - "value": 1875 - } - }, - "3e71940409664a86a2be2c0b86b10d64": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0c28f7e5514c432197ed1b0e24fcc39b", - "max": 605266175, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c41c679d7f0341f3839b9abc5329ec08", - "value": 605266175 - } - }, - "3eb4c6ae76fd4118be2e466e560f6746": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_993a0a13bc7a4deea5cded6df4e27fd1", - "IPY_MODEL_6d0477f7d5744f55a7882de18b923101", - "IPY_MODEL_9b506c9457f046d4a91395935a934790" - ], - "layout": "IPY_MODEL_429a122883f6429d90c994558fc50cfc" - } - }, - "3eec352c0eea47e9b6e419785750df61": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3f343b79e06e4d5d86cf0ef82729e769": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3637b3da70d34d529d48d630baeaf13b", - "IPY_MODEL_56aaec4fab6a47b18e147b2cd0c53c49", - "IPY_MODEL_a7378faa28e44fb19ecdafc7124acce2" - ], - "layout": "IPY_MODEL_b697802729d44712bfe68c0ef4f47cd8" - } - }, - "405da6f282174a00a302d69a17634ca8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "429a122883f6429d90c994558fc50cfc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "432654cc3aff41019a5df2f5dbcab6b3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "435b1f4a70694161a749af8ea40baabb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "435b2d9a31af4b479bca791932a9315f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4378864bd5004edcba65b59fecbe84d3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5d9977f50b704bd5b4a371387cc71a17", - "max": 34, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c7d383a298da44e491c81887632a89ae", - "value": 34 - } - }, - "4477e281c40c41f9a5c076e30640460e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fd56fa76c93c48b2bc179cb0a7bffac6", - "max": 122, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e444beb58b7c4ad1aa94959ef0b5bd52", - "value": 122 - } - }, - "4666546df28e4dc7b6c7d599cac5e26c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "47a715f99b224f41b21d9ddc7467df52": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "48a8c0d648694b8abddf07ed12b68a93": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4666546df28e4dc7b6c7d599cac5e26c", - "placeholder": "​", - "style": "IPY_MODEL_1e2da1ce24984300af0b7246ebd59b14", - "value": "Downloading (…)LIPModel/config.json: 100%" - } - }, - "48f63d3af7604e8abe4ca94dbae63f48": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "498becb6513a44a4a17cff76a8ae4666": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_5066a8fd18d94f9da0aa0cf354d13452", - "IPY_MODEL_a8d41a62f1b94575a744c12a399de8c8", - "IPY_MODEL_e8cc8dbe0d164a9eaec472ffe1351db9" - ], - "layout": "IPY_MODEL_0d3cbcf01ca14063be3ae129db78fdcb" - } - }, - "49d11634ca864c04bc87e472d3563e73": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "info", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_94502dbbc1ab42f48d2d375a8265dbb4", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_090c3e60ff0d4e5fa930767672802d86", - "value": 1 - } - }, - "49ddd099c0ed4535b2f3da0e1dc5f44b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0ab25a53cd28408c802a5dcda0e1206e", - "placeholder": "​", - "style": "IPY_MODEL_4ade3df272594fb68ae463234503c638", - "value": "Downloading (…)d52eb/.gitattributes: 100%" - } - }, - "4a37959a5add495981fde226b229fb19": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4ade3df272594fb68ae463234503c638": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4ae8401caf984baf9fc132490f11646a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c46328ead39408995ba5a9837d57e94": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c7287d821f1471e9ebdafa2f8229d28": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4f166794a0ad4061878ccfc623ec83c0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4fc202bfa55842b48e59d4a800026331": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5066a8fd18d94f9da0aa0cf354d13452": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1a951db095d441e295bb9ff00d4dba30", - "placeholder": "​", - "style": "IPY_MODEL_e120b072886b4d4f94ef506ca3d6a605", - "value": "" - } - }, - "51307f0869b442d88a7a6b49bf7243b2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_aea6c5629724499da38f8a06667f7a2e", - "IPY_MODEL_3e50350e1d3d491ababa8cd3764be1d1", - "IPY_MODEL_79e43db8a877400198f73dbab0105ade" - ], - "layout": "IPY_MODEL_046bafaebfd14a6282e231c6a7c22b3e" - } - }, - "515f4f3435ef468499acd8e48daf709b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "51f3dbca5cb04c818bb65cf4adc7914d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9bcb1bc9f0e2439b8754e9106eb0a802", - "max": 604, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2685384c30bb4865b7f14f62fbe516a0", - "value": 604 - } - }, - "52aa2bbbdd5246db8520ac2822fcbe54": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "52ad436eea8649058457d6380f618ea2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fcfda905fd4448b3940b04f4136c53a4", - "placeholder": "​", - "style": "IPY_MODEL_16162c715d4745f3aa91c7f38a78a32d", - "value": "100%" - } - }, - "534e82da7a5d47a58ae95ad7ebe1fc2e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_13942a35ebd3432d9e3a107c54af9619", - "IPY_MODEL_2108640a73e44fe28bd0445abdb2048d", - "IPY_MODEL_2587fb3caed54a10829e8ba594510e7a" - ], - "layout": "IPY_MODEL_661c517493da40f581fcceab85b5bee1" - } - }, - "540bda296c904aa48c10db351a11ca66": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "56aaec4fab6a47b18e147b2cd0c53c49": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6f7d732ae96847809230720948fba435", - "max": 389, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c026c0d5a95d487f8315821c616d1279", - "value": 389 - } - }, - "56d32c90b86c48b08d020ed3dcfd3a4e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5748285ca2ba4b93b30621437e931345": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "57b9cf8fe2fa4d918e8dc15e2efb6153": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "57ed9e91eb2f45a18ee08f6989bec0a4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5817239aa98046e9be3736688f8c1a5f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4c7287d821f1471e9ebdafa2f8229d28", - "max": 3, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5a65206e09c04176bfd33bcb31cd1ad5", - "value": 3 - } - }, - "5a65206e09c04176bfd33bcb31cd1ad5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5c634542ed3940a1a8ac3f73b634e88d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_de16a90176c24ff8aa76eb0af1281d22", - "placeholder": "​", - "style": "IPY_MODEL_c9464f1ec9e64361b0e621c664514ada", - "value": "Epoch: 100%" - } - }, - "5d9977f50b704bd5b4a371387cc71a17": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5e8c1c4d99654ff1b32732a7297871f7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "61e53dcae1c4457aa5089ae1e8d276ae": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a5a67282d5b14d3f803347ddd2ccc44f", - "placeholder": "​", - "style": "IPY_MODEL_b5a4880486774113bef9fc78d7bb4ed9", - "value": "100%" - } - }, - "6359430089654ca4bdd587c125b317a0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e8e295d7292245fb9f17413599d59960", - "placeholder": "​", - "style": "IPY_MODEL_0286aff9c56b43e7abd15f17dd33d6db", - "value": " 85.3k/85.3k [00:00<00:00, 286kB/s]" - } - }, - "63de802f18e44b16a3a2f5029ad7b705": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9e17b92da1aa43d0aa980db275437fde", - "placeholder": "​", - "style": "IPY_MODEL_d2186c7599164e528aa3d92f6999a9bb", - "value": " 3/3 [00:05<00:00, 1.70s/it]" - } - }, - "6482c21eb5d449f795a9e39fcae6a46e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2db4287290eb4c25bc440fc3f2f1d258", - "IPY_MODEL_b470309f2a6b4878852ae9e589764edf", - "IPY_MODEL_63de802f18e44b16a3a2f5029ad7b705" - ], - "layout": "IPY_MODEL_670f91d2101c4d87997c228210646f89" - } - }, - "661c517493da40f581fcceab85b5bee1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "670f91d2101c4d87997c228210646f89": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "68f83424c07844f1b439cf9a2170818b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6b7bd8943d824ad586939a5279368a0d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6d0477f7d5744f55a7882de18b923101": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e94edd188d8942e6b7f01cb1b962ba46", - "max": 315, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_7059d83f776f49c887879e870bb71ee8", - "value": 315 - } - }, - "6f63b713ac2b4358b939c2818d15e67f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4c46328ead39408995ba5a9837d57e94", - "placeholder": "​", - "style": "IPY_MODEL_35a4c4fa675546ccb34d38b6e1c7139c", - "value": " 34/34 [01:36<00:00, 2.65s/ex]" - } - }, - "6f7d732ae96847809230720948fba435": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7059d83f776f49c887879e870bb71ee8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "707c06347f5444e185ca2bc7c07eecfa": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "70d08a9270294f8380cc7c740700ca8c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "70eee61c4d3842f69569dee3ac8773fa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "71181e8f913b412fb72917c55a151723": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_735a35f35dee4d548cd6284f9e3e1f5a", - "placeholder": "​", - "style": "IPY_MODEL_b7dfa3f0d2374482af6dc25ec8dd65df", - "value": "Downloading data: 100%" - } - }, - "713939a800704d4a8e95217c4b3c6d0e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ac8f808451764c4aac75914710521941", - "IPY_MODEL_3c370e064d684a9f94ca552bbcc893d0", - "IPY_MODEL_a5fc404735384abba31ed330ad4d4985" - ], - "layout": "IPY_MODEL_f6e1dadaed924874a4c12e4759921e36" - } - }, - "714590bcc27e4efc837f870812929f3e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "71f5c4292d4f4564bb0bf5af9368163f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "7282d9ef9a754bcfb1297081296aed5a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "info", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c4a54f5262014b77964d42429d5e43bf", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_cf540757bc79428bab170e157eb9381f", - "value": 1 - } - }, - "735a35f35dee4d548cd6284f9e3e1f5a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "742743267970469b8b358f051aeb3d7f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "7451e0901e6f40f5971359c9ac5d47bb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "74cce0696fd74f999b8476974ec74f30": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "74d53a9e164f492785463bd1cdb376a7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_22a1ecc265c7419b99b89dc635351163", - "max": 4025, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_405da6f282174a00a302d69a17634ca8", - "value": 4025 - } - }, - "75686e22371d4d3babec811f49d280f9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "76252be98eed408a8f08fab0edca8d86": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_5c634542ed3940a1a8ac3f73b634e88d", - "IPY_MODEL_b0721a5abbc044a59a9d119c65523ec3", - "IPY_MODEL_822cf5f25bbb4845a8df5b09df0c88f5" - ], - "layout": "IPY_MODEL_d24e98b7ba64420588aeee6c0a7d787a" - } - }, - "77131a41265342a28468664ad067d4c8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "79e43db8a877400198f73dbab0105ade": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_02f439d46b384718b0c7ab5fd8aa2d03", - "placeholder": "​", - "style": "IPY_MODEL_742743267970469b8b358f051aeb3d7f", - "value": " 1.88k/1.88k [00:00<00:00, 114kB/s]" - } - }, - "7b3978443935464b9a1352ef3bd2d415": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7d508b60c1f44aecb9e3f9711ecc0ce8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "820351c564124d7d871dc17681b747e9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7b3978443935464b9a1352ef3bd2d415", - "placeholder": "​", - "style": "IPY_MODEL_ec25ea55d62e4c0a9443d91d015bdbd6", - "value": " 605M/605M [00:04<00:00, 153MB/s]" - } - }, - "822cf5f25bbb4845a8df5b09df0c88f5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e831d4f1ad4d4f5f97bc9569a5294ca9", - "placeholder": "​", - "style": "IPY_MODEL_71f5c4292d4f4564bb0bf5af9368163f", - "value": " 1/1 [00:57<00:00, 57.78s/it]" - } - }, - "8294163893bc40c2bef26032babb3938": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "83569f1993e44a439c4c99d5d58251f2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8364e4e977ad41fab9688c5a7443f276": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_11dd9c8f6fa744cfbf25ce25f3c8291b", - "IPY_MODEL_4477e281c40c41f9a5c076e30640460e", - "IPY_MODEL_b800cade432f484ab1619620ba8a93b8" - ], - "layout": "IPY_MODEL_b57479b0aab44fae95c28b59905e053c" - } - }, - "84d1a26c298b42cd820bcd2e4302a443": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8513e53c07c04d75999be1fec6d5b491": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "857c37c3638b4c7d8ffaaaa9fe5932f0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "87a472fb7193452faea139ab6bf1c9be": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_082dfef575c94327a5377c3f7278cf5a", - "placeholder": "​", - "style": "IPY_MODEL_84d1a26c298b42cd820bcd2e4302a443", - "value": "Downloading data: 100%" - } - }, - "88677a526eae40a1b5e0b5e9e7a25218": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_139f9e8c84f447b985c2080c17d14a8c", - "max": 85317, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1d37c4be099b44b5bc67c2cd976693b9", - "value": 85317 - } - }, - "88901ee5ca86458288bf82a22db7e379": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "88edaefb70c34854ad6f16d5a82b40e7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ba93d6edfed5434397d869ba2bd7aca5", - "IPY_MODEL_7282d9ef9a754bcfb1297081296aed5a", - "IPY_MODEL_946c67b0a3f54ce09c35db7bef44991c" - ], - "layout": "IPY_MODEL_f41cf420defb4b109f836fecea8e1d15" - } - }, - "89b04536243a48049eb777f00fe49555": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "89ef6f10348a408a8546452e1fa74528": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8aae0197e477451a890b1f93cdeebe10": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8b161e12449f4078bb64245518bb596d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_71181e8f913b412fb72917c55a151723", - "IPY_MODEL_dcd3ba169df34a0395eb5d03405f8835", - "IPY_MODEL_d0b05902a4db4abd8c10cab6178a59ec" - ], - "layout": "IPY_MODEL_8294163893bc40c2bef26032babb3938" - } - }, - "8c72c24c20b84bd4a007929384e91b78": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8ce3c14efb4a4a1983c6cfbba711b26a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_097dc95081d04f7e9f803384b5dee1ea", - "IPY_MODEL_ea9794429bbc4ae98e258157eb4d317e", - "IPY_MODEL_92a14039453e4551b30b5d7f9f8e455d" - ], - "layout": "IPY_MODEL_5e8c1c4d99654ff1b32732a7297871f7" - } - }, - "908b3e162c9b4fe1b4fa35d5b928ecff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "92a14039453e4551b30b5d7f9f8e455d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_68f83424c07844f1b439cf9a2170818b", - "placeholder": "​", - "style": "IPY_MODEL_4a37959a5add495981fde226b229fb19", - "value": " 116/116 [00:00<00:00, 6.91kB/s]" - } - }, - "9411b438e2634a3ebd0d1617d2ec3a7b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "94502dbbc1ab42f48d2d375a8265dbb4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": "20px" - } - }, - "946c67b0a3f54ce09c35db7bef44991c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_03390d37eba24242ae6c2099d5fe7863", - "placeholder": "​", - "style": "IPY_MODEL_88901ee5ca86458288bf82a22db7e379", - "value": " 0/? [00:00<?, ? tables/s]" - } - }, - "9623a214fa5048c38dda05120d5573c3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "963fbb3d5b9f4848a8cadf2a2d2b264b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_52ad436eea8649058457d6380f618ea2", - "IPY_MODEL_5817239aa98046e9be3736688f8c1a5f", - "IPY_MODEL_c48e02dc87d74d50ba6de4aefeb1441e" - ], - "layout": "IPY_MODEL_ecc225aab6804d97b1cdd383621c9695" - } - }, - "9669af3f853d454bbcb5530acf0fd5b5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "97ff6a0b5366407c8138eedd05532d77": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9808aab644d7459aaa5504bef2da9652": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "993a0a13bc7a4deea5cded6df4e27fd1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9411b438e2634a3ebd0d1617d2ec3a7b", - "placeholder": "​", - "style": "IPY_MODEL_ffbfe6debabc4f5c8cad8383d596946e", - "value": "Iteration: 100%" - } - }, - "9a47ab1358b341dda7fba4031bea3bb3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9aae9636296a4245989388403103e0eb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9ac85729093c41c1b2a0fef5191b8ab5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9b506c9457f046d4a91395935a934790": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a696789fe05c40b281a0090625e66004", - "placeholder": "​", - "style": "IPY_MODEL_02b23c74c75e425cafe0c231b2892c47", - "value": " 315/315 [00:57<00:00, 5.36it/s]" - } - }, - "9b783cfaae714956ad6c0ee4bc7f7e23": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_908b3e162c9b4fe1b4fa35d5b928ecff", - "placeholder": "​", - "style": "IPY_MODEL_dc43386a12c642cf93f434dd3f5cc04c", - "value": "Downloading (…)"pytorch_model.bin";: 100%" - } - }, - "9bcb1bc9f0e2439b8754e9106eb0a802": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9cef8cafa31a47fd9f7410478b5415e8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9e17b92da1aa43d0aa980db275437fde": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9e98837e01004eba948e9c2d3ea23e5f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9f38a9239dc8476b81c23b79282ebbf5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9fac34a2d3e44c95a2591b26ddae5c1e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a07c719a0ffc41f9a0e8227562dc69d1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_dc03df51df3845edac3492bc3ee3391e", - "IPY_MODEL_ebf96889240d44f0b5a16fd945090213", - "IPY_MODEL_c011f7d4aa7b4f66997d1c88804afacf" - ], - "layout": "IPY_MODEL_dcadb8f0088d43f6bc1a2cdc72cff730" - } - }, - "a1ec5941f87f4b23bc72188313ac9275": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a5a67282d5b14d3f803347ddd2ccc44f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a5fc404735384abba31ed330ad4d4985": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9623a214fa5048c38dda05120d5573c3", - "placeholder": "​", - "style": "IPY_MODEL_9a47ab1358b341dda7fba4031bea3bb3", - "value": " 525k/525k [00:00<00:00, 1.23MB/s]" - } - }, - "a696789fe05c40b281a0090625e66004": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a6b61668ad1146e78c2a9b2556b23703": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2433cd897d734c78b3d4a62602865415", - "placeholder": "​", - "style": "IPY_MODEL_9fac34a2d3e44c95a2591b26ddae5c1e", - "value": " 3/3 [00:00<00:00, 81.17it/s]" - } - }, - "a7378faa28e44fb19ecdafc7124acce2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9669af3f853d454bbcb5530acf0fd5b5", - "placeholder": "​", - "style": "IPY_MODEL_9f38a9239dc8476b81c23b79282ebbf5", - "value": " 389/389 [00:00<00:00, 25.7kB/s]" - } - }, - "a8d41a62f1b94575a744c12a399de8c8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "info", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d5fea5e89ebd4d298c169526eadeeb32", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_8513e53c07c04d75999be1fec6d5b491", - "value": 1 - } - }, - "a8ec0defdfe841caaf0caa4de07fb84a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_14b5a9186b3a47108fafa070c06ab35c", - "IPY_MODEL_3916f72e314d46e883db7e529210b098", - "IPY_MODEL_34453701eb2f40df9a2a0154c02990d4" - ], - "layout": "IPY_MODEL_9e98837e01004eba948e9c2d3ea23e5f" - } - }, - "ac546920996e4ca081e438999ae19fc8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b91ae0e5a43143e68f53099e85f1afc2", - "placeholder": "​", - "style": "IPY_MODEL_a1ec5941f87f4b23bc72188313ac9275", - "value": " 1.11k/1.11k [00:00<00:00, 74.9kB/s]" - } - }, - "ac8f808451764c4aac75914710521941": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9808aab644d7459aaa5504bef2da9652", - "placeholder": "​", - "style": "IPY_MODEL_12e8ab3cc4fe40e7bbb056b3354603bc", - "value": "Downloading (…)CLIPModel/merges.txt: 100%" - } - }, - "aea6c5629724499da38f8a06667f7a2e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_05c50612c7ec41c7877d85c9d0502c0b", - "placeholder": "​", - "style": "IPY_MODEL_0f590a3e9ed148c9b076fa3b16e287f8", - "value": "Downloading (…)859cad52eb/README.md: 100%" - } - }, - "aedab95f1bb144c38180f2b2e71c14ff": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e97c5c6e969842468f53520dfba1ee8f", - "placeholder": "​", - "style": "IPY_MODEL_89b04536243a48049eb777f00fe49555", - "value": "" - } - }, - "b0721a5abbc044a59a9d119c65523ec3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_74cce0696fd74f999b8476974ec74f30", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_cad144a0ae5948f5896d12f83c044dfd", - "value": 1 - } - }, - "b13577cd8ab54745b1f75619740f7e8d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b179999ec76e48579fd195b7c788f366": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b3c787a117ae42738c315c5cdabd508c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b470309f2a6b4878852ae9e589764edf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_26175752e18140b4a944bfc55f77a069", - "max": 3, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c13ea0c47f6e48f0a804fa9f2f5ce544", - "value": 3 - } - }, - "b51a6ffb9b5940cc90cf1ff5ed32d30d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_61e53dcae1c4457aa5089ae1e8d276ae", - "IPY_MODEL_f8916e9e4c5c4393b0f2c2f77d055757", - "IPY_MODEL_01939245f8874aaca606e7a1e8b954a8" - ], - "layout": "IPY_MODEL_f2cdef43d919412698f85ea03ce79e99" - } - }, - "b57479b0aab44fae95c28b59905e053c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b577441dde9e43cb9644244ca0cf336e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b5a4880486774113bef9fc78d7bb4ed9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b697802729d44712bfe68c0ef4f47cd8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b73b1442ea8c42da8342e62bea1a8d6f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c66c4c5330724c27966b24cfdc87dc3f", - "max": 961143, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0d4090c9cfd542cba1a14d30b154f733", - "value": 961143 - } - }, - "b73e9fcc9e634c2780cc880cc65cc678": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b77605d5afb94b8cb579f3c690ea4203": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_aedab95f1bb144c38180f2b2e71c14ff", - "IPY_MODEL_49d11634ca864c04bc87e472d3563e73", - "IPY_MODEL_20163aeba069412eabd3b506a339667f" - ], - "layout": "IPY_MODEL_feef17ea90ae4105b2ffb56841ed2adb" - } - }, - "b7dfa3f0d2374482af6dc25ec8dd65df": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b800cade432f484ab1619620ba8a93b8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_52aa2bbbdd5246db8520ac2822fcbe54", - "placeholder": "​", - "style": "IPY_MODEL_39555af32eeb44bfa49b04aa493320ba", - "value": " 122/122 [00:00<00:00, 7.68kB/s]" - } - }, - "b850f11900ae4874963b35e0b86470e5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b8da2402e8e245d2b5ff592f602f138e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b91ae0e5a43143e68f53099e85f1afc2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b9850f02e9ec4069aa836d9ed11aa019": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ba93d6edfed5434397d869ba2bd7aca5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c10b310f7f6344b39c8d165c887d28f1", - "placeholder": "​", - "style": "IPY_MODEL_052b62b792fc4f1183ffb91f18d655c7", - "value": "" - } - }, - "bbb9442a3ec1463790bcf6533f3be905": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_16bd920f956f43b3921b79cec4d76d87", - "IPY_MODEL_c12bc29016124339a6c3cb11f21bfaf0", - "IPY_MODEL_a6b61668ad1146e78c2a9b2556b23703" - ], - "layout": "IPY_MODEL_14cc3d3e93c54226bc2f38a4745c1d23" - } - }, - "bcfc02b30e024b5c81856bcd9e191d07": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f4d6914361f44912b0d7ca1b16496606", - "placeholder": "​", - "style": "IPY_MODEL_b3c787a117ae42738c315c5cdabd508c", - "value": "Downloading: 100%" - } - }, - "be06bfcbe41040369d025626b8227b6b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "be63efa842e7438ab1fc2f4eb2ceb55f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2f093aa6d36c4622bd09d1253bf6c3d4", - "placeholder": "​", - "style": "IPY_MODEL_c13b9d0a74f540d5ab6c01064c149a65", - "value": " 690/690 [00:00<00:00, 45.7kB/s]" - } - }, - "bebd64b52d24437dabeb5119235853d7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c011f7d4aa7b4f66997d1c88804afacf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0b4387ee1a0d4f769b391dcbc4d5477c", - "placeholder": "​", - "style": "IPY_MODEL_b8da2402e8e245d2b5ff592f602f138e", - "value": " 252/252 [00:02<00:00, 113.35it/s]" - } - }, - "c026c0d5a95d487f8315821c616d1279": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c10b310f7f6344b39c8d165c887d28f1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c11880f9dbf24e7b97a4a6e6c45d27bd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9b783cfaae714956ad6c0ee4bc7f7e23", - "IPY_MODEL_3e71940409664a86a2be2c0b86b10d64", - "IPY_MODEL_820351c564124d7d871dc17681b747e9" - ], - "layout": "IPY_MODEL_432654cc3aff41019a5df2f5dbcab6b3" - } - }, - "c12bc29016124339a6c3cb11f21bfaf0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9cef8cafa31a47fd9f7410478b5415e8", - "max": 3, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2e4deb55d7b74e2fa09c9f925fd2e751", - "value": 3 - } - }, - "c13b9d0a74f540d5ab6c01064c149a65": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c13ea0c47f6e48f0a804fa9f2f5ce544": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c1f28cff3e064e46a8c9bdd0d705a099": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c41c679d7f0341f3839b9abc5329ec08": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c48e02dc87d74d50ba6de4aefeb1441e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9ac85729093c41c1b2a0fef5191b8ab5", - "placeholder": "​", - "style": "IPY_MODEL_d774f350171b4c3fb6be768dbf11c726", - "value": " 3/3 [00:00<00:00, 125.78it/s]" - } - }, - "c4a54f5262014b77964d42429d5e43bf": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": "20px" - } - }, - "c54e39be37434e0d826d86041ef1de96": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c59a11b4cdcd45a181e5417839a21245": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c5fecfc9041049e7834ae864bd54d59e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c65f5b1c90e24d47a060c0b362749b82": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c66c4c5330724c27966b24cfdc87dc3f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c7d383a298da44e491c81887632a89ae": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c9464f1ec9e64361b0e621c664514ada": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cad144a0ae5948f5896d12f83c044dfd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "cf31d13e5ccd4598bdf3353d1d914874": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e110392a48e446449f36eb31d42ba380", - "placeholder": "​", - "style": "IPY_MODEL_3eec352c0eea47e9b6e419785750df61", - "value": " 961k/961k [00:00<00:00, 2.26MB/s]" - } - }, - "cf540757bc79428bab170e157eb9381f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d0b05902a4db4abd8c10cab6178a59ec": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_36156c14c8d544deab0ea65ac5ba7508", - "placeholder": "​", - "style": "IPY_MODEL_15d85f4fe15545bc85d3717d7af7bbb1", - "value": " 60.7k/60.7k [00:00<00:00, 685kB/s]" - } - }, - "d2186c7599164e528aa3d92f6999a9bb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d24e98b7ba64420588aeee6c0a7d787a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d2cf4d1133a2421aa1ec980bdca4fecd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_bcfc02b30e024b5c81856bcd9e191d07", - "IPY_MODEL_eaefc3e261a7453cb66905079b71f16a", - "IPY_MODEL_ac546920996e4ca081e438999ae19fc8" - ], - "layout": "IPY_MODEL_c54e39be37434e0d826d86041ef1de96" - } - }, - "d39695c4fadd4bdb93756645a3c3a1e0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_e5d4a32f5b384dbaa0e99ff1b925836a", - "IPY_MODEL_b73b1442ea8c42da8342e62bea1a8d6f", - "IPY_MODEL_cf31d13e5ccd4598bdf3353d1d914874" - ], - "layout": "IPY_MODEL_2f87bde2c64b42bc90ed461265cfe1f1" - } - }, - "d5daacab713847ef98afd7a77fbd613c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d5fea5e89ebd4d298c169526eadeeb32": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": "20px" - } - }, - "d774f350171b4c3fb6be768dbf11c726": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d8f11947b0d048489bdd649479c50b1f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d9c6dc4bccfb4e57a9ffcef2f9269172": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_48a8c0d648694b8abddf07ed12b68a93", - "IPY_MODEL_74d53a9e164f492785463bd1cdb376a7", - "IPY_MODEL_35c4bd29723b46ad94b098685953aefb" - ], - "layout": "IPY_MODEL_7d508b60c1f44aecb9e3f9711ecc0ce8" - } - }, - "dc03df51df3845edac3492bc3ee3391e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b73e9fcc9e634c2780cc880cc65cc678", - "placeholder": "​", - "style": "IPY_MODEL_57ed9e91eb2f45a18ee08f6989bec0a4", - "value": "100%" - } - }, - "dc43386a12c642cf93f434dd3f5cc04c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "dcadb8f0088d43f6bc1a2cdc72cff730": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "dcd3ba169df34a0395eb5d03405f8835": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c65f5b1c90e24d47a060c0b362749b82", - "max": 60722, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_01bc8315e7cd4064a8653eae44cea192", - "value": 60722 - } - }, - "dd355cdd1594499bbccc8a7842a0ba93": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "de16a90176c24ff8aa76eb0af1281d22": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e110392a48e446449f36eb31d42ba380": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e120b072886b4d4f94ef506ca3d6a605": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e145a20bac85423a8ac6a1841f7ff4d7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e444beb58b7c4ad1aa94959ef0b5bd52": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e5d4a32f5b384dbaa0e99ff1b925836a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f305b7e403a647379268573a95d22f92", - "placeholder": "​", - "style": "IPY_MODEL_d5daacab713847ef98afd7a77fbd613c", - "value": "Downloading (…)CLIPModel/vocab.json: 100%" - } - }, - "e7da6c65cec140c3801fca68bcfdebcc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e831d4f1ad4d4f5f97bc9569a5294ca9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e8756ee3f2f64e01b2790714b6704584": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e8cc8dbe0d164a9eaec472ffe1351db9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e7da6c65cec140c3801fca68bcfdebcc", - "placeholder": "​", - "style": "IPY_MODEL_eccef3d9147242d9a1e42f4abd0ebe4c", - "value": " 0/? [00:00<?, ? tables/s]" - } - }, - "e8e295d7292245fb9f17413599d59960": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e94edd188d8942e6b7f01cb1b962ba46": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e97c5c6e969842468f53520dfba1ee8f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ea9794429bbc4ae98e258157eb4d317e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c5fecfc9041049e7834ae864bd54d59e", - "max": 116, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_857c37c3638b4c7d8ffaaaa9fe5932f0", - "value": 116 - } - }, - "eaefc3e261a7453cb66905079b71f16a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_edf41d1718984e7dbef2b65799935fd8", - "max": 1109, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_20dad493a55343efaabbc6f7c1966cf0", - "value": 1109 - } - }, - "eaf5ad9e9e3240799128ef8bbf0a195a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1ad0c7637293497a9a75bfa45c1a06e8", - "placeholder": "​", - "style": "IPY_MODEL_4fc202bfa55842b48e59d4a800026331", - "value": " 85.3k/85.3k [00:00<00:00, 343kB/s]" - } - }, - "eb9b58dab8044074b1cac3e39f26e561": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0987a64b9cea4850998e78d76d31c3b8", - "max": 85317, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_fa0573f2ebd84a5ba5ebdc0452362887", - "value": 85317 - } - }, - "ebf96889240d44f0b5a16fd945090213": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ae8401caf984baf9fc132490f11646a", - "max": 252, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e145a20bac85423a8ac6a1841f7ff4d7", - "value": 252 - } - }, - "ec25ea55d62e4c0a9443d91d015bdbd6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ecc225aab6804d97b1cdd383621c9695": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "eccef3d9147242d9a1e42f4abd0ebe4c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ed19b6e281f145af9b6df5eb783e9a95": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_49ddd099c0ed4535b2f3da0e1dc5f44b", - "IPY_MODEL_12da828a56fd47fbac3881a78ae3b40c", - "IPY_MODEL_be63efa842e7438ab1fc2f4eb2ceb55f" - ], - "layout": "IPY_MODEL_435b2d9a31af4b479bca791932a9315f" - } - }, - "edf41d1718984e7dbef2b65799935fd8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f2cdef43d919412698f85ea03ce79e99": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f305b7e403a647379268573a95d22f92": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f41cf420defb4b109f836fecea8e1d15": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "f4d6914361f44912b0d7ca1b16496606": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f6e1dadaed924874a4c12e4759921e36": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f7d2ce646a4c42ed893383a4f75af2d6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f8916e9e4c5c4393b0f2c2f77d055757": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_707c06347f5444e185ca2bc7c07eecfa", - "max": 252, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_77131a41265342a28468664ad067d4c8", - "value": 252 - } - }, - "f8d66c452a6d4347b9a8247ad135fd35": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3c8aa1d02f1541bdb59b0a8dff27148f", - "IPY_MODEL_88677a526eae40a1b5e0b5e9e7a25218", - "IPY_MODEL_eaf5ad9e9e3240799128ef8bbf0a195a" - ], - "layout": "IPY_MODEL_48f63d3af7604e8abe4ca94dbae63f48" - } - }, - "fa0573f2ebd84a5ba5ebdc0452362887": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "fadb08eafb084a4e91f8406055beb502": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fcfda905fd4448b3940b04f4136c53a4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fd56fa76c93c48b2bc179cb0a7bffac6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fe63dbf326194c6c813327bd643a85cd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "feef17ea90ae4105b2ffb56841ed2adb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "ffbfe6debabc4f5c8cad8383d596946e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } + "layout": "IPY_MODEL_f41cf420defb4b109f836fecea8e1d15" + } + }, + "89b04536243a48049eb777f00fe49555": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "89ef6f10348a408a8546452e1fa74528": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8aae0197e477451a890b1f93cdeebe10": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8b161e12449f4078bb64245518bb596d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_71181e8f913b412fb72917c55a151723", + "IPY_MODEL_dcd3ba169df34a0395eb5d03405f8835", + "IPY_MODEL_d0b05902a4db4abd8c10cab6178a59ec" + ], + "layout": "IPY_MODEL_8294163893bc40c2bef26032babb3938" + } + }, + "8c72c24c20b84bd4a007929384e91b78": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ce3c14efb4a4a1983c6cfbba711b26a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_097dc95081d04f7e9f803384b5dee1ea", + "IPY_MODEL_ea9794429bbc4ae98e258157eb4d317e", + "IPY_MODEL_92a14039453e4551b30b5d7f9f8e455d" + ], + "layout": "IPY_MODEL_5e8c1c4d99654ff1b32732a7297871f7" + } + }, + "908b3e162c9b4fe1b4fa35d5b928ecff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "92a14039453e4551b30b5d7f9f8e455d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_68f83424c07844f1b439cf9a2170818b", + "placeholder": "​", + "style": "IPY_MODEL_4a37959a5add495981fde226b229fb19", + "value": " 116/116 [00:00<00:00, 6.91kB/s]" + } + }, + "9411b438e2634a3ebd0d1617d2ec3a7b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "94502dbbc1ab42f48d2d375a8265dbb4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "946c67b0a3f54ce09c35db7bef44991c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03390d37eba24242ae6c2099d5fe7863", + "placeholder": "​", + "style": "IPY_MODEL_88901ee5ca86458288bf82a22db7e379", + "value": " 0/? [00:00<?, ? tables/s]" + } + }, + "9623a214fa5048c38dda05120d5573c3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "963fbb3d5b9f4848a8cadf2a2d2b264b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_52ad436eea8649058457d6380f618ea2", + "IPY_MODEL_5817239aa98046e9be3736688f8c1a5f", + "IPY_MODEL_c48e02dc87d74d50ba6de4aefeb1441e" + ], + "layout": "IPY_MODEL_ecc225aab6804d97b1cdd383621c9695" + } + }, + "9669af3f853d454bbcb5530acf0fd5b5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "97ff6a0b5366407c8138eedd05532d77": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9808aab644d7459aaa5504bef2da9652": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "993a0a13bc7a4deea5cded6df4e27fd1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9411b438e2634a3ebd0d1617d2ec3a7b", + "placeholder": "​", + "style": "IPY_MODEL_ffbfe6debabc4f5c8cad8383d596946e", + "value": "Iteration: 100%" + } + }, + "9a47ab1358b341dda7fba4031bea3bb3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9aae9636296a4245989388403103e0eb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9ac85729093c41c1b2a0fef5191b8ab5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9b506c9457f046d4a91395935a934790": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a696789fe05c40b281a0090625e66004", + "placeholder": "​", + "style": "IPY_MODEL_02b23c74c75e425cafe0c231b2892c47", + "value": " 315/315 [00:57<00:00, 5.36it/s]" + } + }, + "9b783cfaae714956ad6c0ee4bc7f7e23": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_908b3e162c9b4fe1b4fa35d5b928ecff", + "placeholder": "​", + "style": "IPY_MODEL_dc43386a12c642cf93f434dd3f5cc04c", + "value": "Downloading (…)"pytorch_model.bin";: 100%" + } + }, + "9bcb1bc9f0e2439b8754e9106eb0a802": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9cef8cafa31a47fd9f7410478b5415e8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9e17b92da1aa43d0aa980db275437fde": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9e98837e01004eba948e9c2d3ea23e5f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9f38a9239dc8476b81c23b79282ebbf5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9fac34a2d3e44c95a2591b26ddae5c1e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a07c719a0ffc41f9a0e8227562dc69d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_dc03df51df3845edac3492bc3ee3391e", + "IPY_MODEL_ebf96889240d44f0b5a16fd945090213", + "IPY_MODEL_c011f7d4aa7b4f66997d1c88804afacf" + ], + "layout": "IPY_MODEL_dcadb8f0088d43f6bc1a2cdc72cff730" + } + }, + "a1ec5941f87f4b23bc72188313ac9275": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a5a67282d5b14d3f803347ddd2ccc44f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a5fc404735384abba31ed330ad4d4985": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9623a214fa5048c38dda05120d5573c3", + "placeholder": "​", + "style": "IPY_MODEL_9a47ab1358b341dda7fba4031bea3bb3", + "value": " 525k/525k [00:00<00:00, 1.23MB/s]" + } + }, + "a696789fe05c40b281a0090625e66004": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a6b61668ad1146e78c2a9b2556b23703": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2433cd897d734c78b3d4a62602865415", + "placeholder": "​", + "style": "IPY_MODEL_9fac34a2d3e44c95a2591b26ddae5c1e", + "value": " 3/3 [00:00<00:00, 81.17it/s]" + } + }, + "a7378faa28e44fb19ecdafc7124acce2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9669af3f853d454bbcb5530acf0fd5b5", + "placeholder": "​", + "style": "IPY_MODEL_9f38a9239dc8476b81c23b79282ebbf5", + "value": " 389/389 [00:00<00:00, 25.7kB/s]" + } + }, + "a8d41a62f1b94575a744c12a399de8c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "info", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d5fea5e89ebd4d298c169526eadeeb32", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8513e53c07c04d75999be1fec6d5b491", + "value": 1 + } + }, + "a8ec0defdfe841caaf0caa4de07fb84a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_14b5a9186b3a47108fafa070c06ab35c", + "IPY_MODEL_3916f72e314d46e883db7e529210b098", + "IPY_MODEL_34453701eb2f40df9a2a0154c02990d4" + ], + "layout": "IPY_MODEL_9e98837e01004eba948e9c2d3ea23e5f" + } + }, + "ac546920996e4ca081e438999ae19fc8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b91ae0e5a43143e68f53099e85f1afc2", + "placeholder": "​", + "style": "IPY_MODEL_a1ec5941f87f4b23bc72188313ac9275", + "value": " 1.11k/1.11k [00:00<00:00, 74.9kB/s]" + } + }, + "ac8f808451764c4aac75914710521941": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9808aab644d7459aaa5504bef2da9652", + "placeholder": "​", + "style": "IPY_MODEL_12e8ab3cc4fe40e7bbb056b3354603bc", + "value": "Downloading (…)CLIPModel/merges.txt: 100%" + } + }, + "aea6c5629724499da38f8a06667f7a2e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_05c50612c7ec41c7877d85c9d0502c0b", + "placeholder": "​", + "style": "IPY_MODEL_0f590a3e9ed148c9b076fa3b16e287f8", + "value": "Downloading (…)859cad52eb/README.md: 100%" + } + }, + "aedab95f1bb144c38180f2b2e71c14ff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e97c5c6e969842468f53520dfba1ee8f", + "placeholder": "​", + "style": "IPY_MODEL_89b04536243a48049eb777f00fe49555", + "value": "" + } + }, + "b0721a5abbc044a59a9d119c65523ec3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_74cce0696fd74f999b8476974ec74f30", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cad144a0ae5948f5896d12f83c044dfd", + "value": 1 + } + }, + "b13577cd8ab54745b1f75619740f7e8d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b179999ec76e48579fd195b7c788f366": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b3c787a117ae42738c315c5cdabd508c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b470309f2a6b4878852ae9e589764edf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_26175752e18140b4a944bfc55f77a069", + "max": 3, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c13ea0c47f6e48f0a804fa9f2f5ce544", + "value": 3 + } + }, + "b51a6ffb9b5940cc90cf1ff5ed32d30d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_61e53dcae1c4457aa5089ae1e8d276ae", + "IPY_MODEL_f8916e9e4c5c4393b0f2c2f77d055757", + "IPY_MODEL_01939245f8874aaca606e7a1e8b954a8" + ], + "layout": "IPY_MODEL_f2cdef43d919412698f85ea03ce79e99" + } + }, + "b57479b0aab44fae95c28b59905e053c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b577441dde9e43cb9644244ca0cf336e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b5a4880486774113bef9fc78d7bb4ed9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b697802729d44712bfe68c0ef4f47cd8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b73b1442ea8c42da8342e62bea1a8d6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c66c4c5330724c27966b24cfdc87dc3f", + "max": 961143, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0d4090c9cfd542cba1a14d30b154f733", + "value": 961143 + } + }, + "b73e9fcc9e634c2780cc880cc65cc678": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b77605d5afb94b8cb579f3c690ea4203": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aedab95f1bb144c38180f2b2e71c14ff", + "IPY_MODEL_49d11634ca864c04bc87e472d3563e73", + "IPY_MODEL_20163aeba069412eabd3b506a339667f" + ], + "layout": "IPY_MODEL_feef17ea90ae4105b2ffb56841ed2adb" + } + }, + "b7dfa3f0d2374482af6dc25ec8dd65df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b800cade432f484ab1619620ba8a93b8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_52aa2bbbdd5246db8520ac2822fcbe54", + "placeholder": "​", + "style": "IPY_MODEL_39555af32eeb44bfa49b04aa493320ba", + "value": " 122/122 [00:00<00:00, 7.68kB/s]" + } + }, + "b850f11900ae4874963b35e0b86470e5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b8da2402e8e245d2b5ff592f602f138e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b91ae0e5a43143e68f53099e85f1afc2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b9850f02e9ec4069aa836d9ed11aa019": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ba93d6edfed5434397d869ba2bd7aca5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c10b310f7f6344b39c8d165c887d28f1", + "placeholder": "​", + "style": "IPY_MODEL_052b62b792fc4f1183ffb91f18d655c7", + "value": "" + } + }, + "bbb9442a3ec1463790bcf6533f3be905": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_16bd920f956f43b3921b79cec4d76d87", + "IPY_MODEL_c12bc29016124339a6c3cb11f21bfaf0", + "IPY_MODEL_a6b61668ad1146e78c2a9b2556b23703" + ], + "layout": "IPY_MODEL_14cc3d3e93c54226bc2f38a4745c1d23" + } + }, + "bcfc02b30e024b5c81856bcd9e191d07": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f4d6914361f44912b0d7ca1b16496606", + "placeholder": "​", + "style": "IPY_MODEL_b3c787a117ae42738c315c5cdabd508c", + "value": "Downloading: 100%" + } + }, + "be06bfcbe41040369d025626b8227b6b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "be63efa842e7438ab1fc2f4eb2ceb55f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2f093aa6d36c4622bd09d1253bf6c3d4", + "placeholder": "​", + "style": "IPY_MODEL_c13b9d0a74f540d5ab6c01064c149a65", + "value": " 690/690 [00:00<00:00, 45.7kB/s]" + } + }, + "bebd64b52d24437dabeb5119235853d7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c011f7d4aa7b4f66997d1c88804afacf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0b4387ee1a0d4f769b391dcbc4d5477c", + "placeholder": "​", + "style": "IPY_MODEL_b8da2402e8e245d2b5ff592f602f138e", + "value": " 252/252 [00:02<00:00, 113.35it/s]" + } + }, + "c026c0d5a95d487f8315821c616d1279": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c10b310f7f6344b39c8d165c887d28f1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c11880f9dbf24e7b97a4a6e6c45d27bd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9b783cfaae714956ad6c0ee4bc7f7e23", + "IPY_MODEL_3e71940409664a86a2be2c0b86b10d64", + "IPY_MODEL_820351c564124d7d871dc17681b747e9" + ], + "layout": "IPY_MODEL_432654cc3aff41019a5df2f5dbcab6b3" + } + }, + "c12bc29016124339a6c3cb11f21bfaf0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9cef8cafa31a47fd9f7410478b5415e8", + "max": 3, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2e4deb55d7b74e2fa09c9f925fd2e751", + "value": 3 + } + }, + "c13b9d0a74f540d5ab6c01064c149a65": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c13ea0c47f6e48f0a804fa9f2f5ce544": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c1f28cff3e064e46a8c9bdd0d705a099": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c41c679d7f0341f3839b9abc5329ec08": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c48e02dc87d74d50ba6de4aefeb1441e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9ac85729093c41c1b2a0fef5191b8ab5", + "placeholder": "​", + "style": "IPY_MODEL_d774f350171b4c3fb6be768dbf11c726", + "value": " 3/3 [00:00<00:00, 125.78it/s]" + } + }, + "c4a54f5262014b77964d42429d5e43bf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "c54e39be37434e0d826d86041ef1de96": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c59a11b4cdcd45a181e5417839a21245": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c5fecfc9041049e7834ae864bd54d59e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c65f5b1c90e24d47a060c0b362749b82": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c66c4c5330724c27966b24cfdc87dc3f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c7d383a298da44e491c81887632a89ae": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c9464f1ec9e64361b0e621c664514ada": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cad144a0ae5948f5896d12f83c044dfd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cf31d13e5ccd4598bdf3353d1d914874": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e110392a48e446449f36eb31d42ba380", + "placeholder": "​", + "style": "IPY_MODEL_3eec352c0eea47e9b6e419785750df61", + "value": " 961k/961k [00:00<00:00, 2.26MB/s]" + } + }, + "cf540757bc79428bab170e157eb9381f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d0b05902a4db4abd8c10cab6178a59ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_36156c14c8d544deab0ea65ac5ba7508", + "placeholder": "​", + "style": "IPY_MODEL_15d85f4fe15545bc85d3717d7af7bbb1", + "value": " 60.7k/60.7k [00:00<00:00, 685kB/s]" + } + }, + "d2186c7599164e528aa3d92f6999a9bb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d24e98b7ba64420588aeee6c0a7d787a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d2cf4d1133a2421aa1ec980bdca4fecd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_bcfc02b30e024b5c81856bcd9e191d07", + "IPY_MODEL_eaefc3e261a7453cb66905079b71f16a", + "IPY_MODEL_ac546920996e4ca081e438999ae19fc8" + ], + "layout": "IPY_MODEL_c54e39be37434e0d826d86041ef1de96" + } + }, + "d39695c4fadd4bdb93756645a3c3a1e0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e5d4a32f5b384dbaa0e99ff1b925836a", + "IPY_MODEL_b73b1442ea8c42da8342e62bea1a8d6f", + "IPY_MODEL_cf31d13e5ccd4598bdf3353d1d914874" + ], + "layout": "IPY_MODEL_2f87bde2c64b42bc90ed461265cfe1f1" + } + }, + "d5daacab713847ef98afd7a77fbd613c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d5fea5e89ebd4d298c169526eadeeb32": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "d774f350171b4c3fb6be768dbf11c726": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d8f11947b0d048489bdd649479c50b1f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d9c6dc4bccfb4e57a9ffcef2f9269172": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_48a8c0d648694b8abddf07ed12b68a93", + "IPY_MODEL_74d53a9e164f492785463bd1cdb376a7", + "IPY_MODEL_35c4bd29723b46ad94b098685953aefb" + ], + "layout": "IPY_MODEL_7d508b60c1f44aecb9e3f9711ecc0ce8" + } + }, + "dc03df51df3845edac3492bc3ee3391e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b73e9fcc9e634c2780cc880cc65cc678", + "placeholder": "​", + "style": "IPY_MODEL_57ed9e91eb2f45a18ee08f6989bec0a4", + "value": "100%" + } + }, + "dc43386a12c642cf93f434dd3f5cc04c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dcadb8f0088d43f6bc1a2cdc72cff730": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dcd3ba169df34a0395eb5d03405f8835": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c65f5b1c90e24d47a060c0b362749b82", + "max": 60722, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_01bc8315e7cd4064a8653eae44cea192", + "value": 60722 + } + }, + "dd355cdd1594499bbccc8a7842a0ba93": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "de16a90176c24ff8aa76eb0af1281d22": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e110392a48e446449f36eb31d42ba380": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e120b072886b4d4f94ef506ca3d6a605": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e145a20bac85423a8ac6a1841f7ff4d7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e444beb58b7c4ad1aa94959ef0b5bd52": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e5d4a32f5b384dbaa0e99ff1b925836a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f305b7e403a647379268573a95d22f92", + "placeholder": "​", + "style": "IPY_MODEL_d5daacab713847ef98afd7a77fbd613c", + "value": "Downloading (…)CLIPModel/vocab.json: 100%" + } + }, + "e7da6c65cec140c3801fca68bcfdebcc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e831d4f1ad4d4f5f97bc9569a5294ca9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e8756ee3f2f64e01b2790714b6704584": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e8cc8dbe0d164a9eaec472ffe1351db9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e7da6c65cec140c3801fca68bcfdebcc", + "placeholder": "​", + "style": "IPY_MODEL_eccef3d9147242d9a1e42f4abd0ebe4c", + "value": " 0/? [00:00<?, ? tables/s]" + } + }, + "e8e295d7292245fb9f17413599d59960": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e94edd188d8942e6b7f01cb1b962ba46": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e97c5c6e969842468f53520dfba1ee8f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ea9794429bbc4ae98e258157eb4d317e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c5fecfc9041049e7834ae864bd54d59e", + "max": 116, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_857c37c3638b4c7d8ffaaaa9fe5932f0", + "value": 116 + } + }, + "eaefc3e261a7453cb66905079b71f16a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_edf41d1718984e7dbef2b65799935fd8", + "max": 1109, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_20dad493a55343efaabbc6f7c1966cf0", + "value": 1109 + } + }, + "eaf5ad9e9e3240799128ef8bbf0a195a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1ad0c7637293497a9a75bfa45c1a06e8", + "placeholder": "​", + "style": "IPY_MODEL_4fc202bfa55842b48e59d4a800026331", + "value": " 85.3k/85.3k [00:00<00:00, 343kB/s]" + } + }, + "eb9b58dab8044074b1cac3e39f26e561": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0987a64b9cea4850998e78d76d31c3b8", + "max": 85317, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fa0573f2ebd84a5ba5ebdc0452362887", + "value": 85317 + } + }, + "ebf96889240d44f0b5a16fd945090213": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4ae8401caf984baf9fc132490f11646a", + "max": 252, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e145a20bac85423a8ac6a1841f7ff4d7", + "value": 252 + } + }, + "ec25ea55d62e4c0a9443d91d015bdbd6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ecc225aab6804d97b1cdd383621c9695": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eccef3d9147242d9a1e42f4abd0ebe4c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ed19b6e281f145af9b6df5eb783e9a95": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_49ddd099c0ed4535b2f3da0e1dc5f44b", + "IPY_MODEL_12da828a56fd47fbac3881a78ae3b40c", + "IPY_MODEL_be63efa842e7438ab1fc2f4eb2ceb55f" + ], + "layout": "IPY_MODEL_435b2d9a31af4b479bca791932a9315f" + } + }, + "edf41d1718984e7dbef2b65799935fd8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f2cdef43d919412698f85ea03ce79e99": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f305b7e403a647379268573a95d22f92": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f41cf420defb4b109f836fecea8e1d15": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "f4d6914361f44912b0d7ca1b16496606": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f6e1dadaed924874a4c12e4759921e36": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7d2ce646a4c42ed893383a4f75af2d6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f8916e9e4c5c4393b0f2c2f77d055757": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_707c06347f5444e185ca2bc7c07eecfa", + "max": 252, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_77131a41265342a28468664ad067d4c8", + "value": 252 + } + }, + "f8d66c452a6d4347b9a8247ad135fd35": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3c8aa1d02f1541bdb59b0a8dff27148f", + "IPY_MODEL_88677a526eae40a1b5e0b5e9e7a25218", + "IPY_MODEL_eaf5ad9e9e3240799128ef8bbf0a195a" + ], + "layout": "IPY_MODEL_48f63d3af7604e8abe4ca94dbae63f48" + } + }, + "fa0573f2ebd84a5ba5ebdc0452362887": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fadb08eafb084a4e91f8406055beb502": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fcfda905fd4448b3940b04f4136c53a4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd56fa76c93c48b2bc179cb0a7bffac6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fe63dbf326194c6c813327bd643a85cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "feef17ea90ae4105b2ffb56841ed2adb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "ffbfe6debabc4f5c8cad8383d596946e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-weaksupervision.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-weaksupervision.ipynb index 2de77be714..773bd933cf 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-weaksupervision.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-weaksupervision.ipynb @@ -155,14 +155,12 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -171,6 +169,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -181,7 +180,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -215,6 +214,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -225,14 +225,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -1651,7 +1655,7 @@ " 0.6,\n", " 0.6,\n", " 0.8,\n", - "]\n" + "]" ] }, { @@ -1783,7 +1787,7 @@ "\n", " return metrics.classification_report(\n", " y_test, predicted, target_names=[k for k in label2int.keys() if k]\n", - " )\n" + " )" ] }, { @@ -1829,7 +1833,7 @@ } ], "source": [ - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -1905,7 +1909,7 @@ " return transitions_df, text\n", "\n", "\n", - "transitions, text = get_transitions(weak_labels, 15)\n" + "transitions, text = get_transitions(weak_labels, 15)" ] }, { @@ -2176,7 +2180,7 @@ } ], "source": [ - "transitions.transpose()\n" + "transitions.transpose()" ] }, { @@ -2208,7 +2212,7 @@ "thresholds = [0.8] * len(rules)\n", "\n", "# As we have already generated the index in our first call, we just need to provide the thresholds.\n", - "weak_labels.extend_matrix(thresholds)\n" + "weak_labels.extend_matrix(thresholds)" ] }, { @@ -2475,7 +2479,7 @@ "summary = summary.rename(columns={\"index\": \"rule\"})\n", "summary = summary.sort_values(by=\"overlaps\", ascending=True)[[\"rule\", \"overlaps\"]]\n", "summary = summary.reset_index()\n", - "summary\n" + "summary" ] }, { @@ -2521,7 +2525,7 @@ "weak_labels.extend_matrix(thresholds)\n", "label_model = Snorkel(weak_labels)\n", "label_model.fit(lr=0.002, n_epochs=10, progress_bar=False)\n", - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -2561,7 +2565,7 @@ " len(weak_labels.annotation()),\n", " )\n", " coverage = sup / n\n", - " return 2 * acc * coverage / (acc + coverage)\n" + " return 2 * acc * coverage / (acc + coverage)" ] }, { @@ -2608,7 +2612,7 @@ "source": [ "import copy\n", "from tqdm.auto import tqdm\n", - "import numpy as np \n", + "import numpy as np\n", "\n", "ths_range = np.arange(1, 0.3, -0.1)\n", "n_ths = len(weak_labels.rules)\n", @@ -2622,7 +2626,7 @@ " acc = train_eval_labelmodel(thresholds)\n", " if acc > best_acc:\n", " best_acc = acc\n", - " best_thresholds = thresholds.copy()\n" + " best_thresholds = thresholds.copy()" ] }, { @@ -2650,7 +2654,7 @@ } ], "source": [ - "np.array(best_thresholds)\n" + "np.array(best_thresholds)" ] }, { @@ -2687,7 +2691,7 @@ "weak_labels.extend_matrix(best_thresholds)\n", "label_model = Snorkel(weak_labels)\n", "label_model.fit(lr=0.002, n_epochs=10, progress_bar=False)\n", - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -2743,7 +2747,7 @@ " y=y_test_for_grid_search,\n", " )\n", "\n", - " return accuracy\n" + " return accuracy" ] }, { @@ -2802,7 +2806,7 @@ " acc = train_eval_downstream(thresholds)\n", " if acc > best_acc:\n", " best_acc = acc\n", - " best_thresholds = thresholds.copy()\n" + " best_thresholds = thresholds.copy()" ] }, { @@ -2830,7 +2834,7 @@ } ], "source": [ - "np.array(best_thresholds)\n" + "np.array(best_thresholds)" ] }, { @@ -2867,7 +2871,7 @@ "weak_labels.extend_matrix(best_thresholds)\n", "label_model = Snorkel(weak_labels)\n", "label_model.fit(lr=0.002, n_epochs=10, progress_bar=False)\n", - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -2991,7 +2995,7 @@ "ax[0].add_artist(legend1)\n", "\n", "fig.tight_layout()\n", - "plt.savefig(\"extend_weak_labels.png\", facecolor=\"white\", transparent=False)\n" + "plt.savefig(\"extend_weak_labels.png\", facecolor=\"white\", transparent=False)" ] } ], diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-setfit-zeroshot.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-setfit-zeroshot.ipynb index 117b27ed0d..5fd503adf5 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-setfit-zeroshot.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-setfit-zeroshot.ipynb @@ -136,10 +136,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init( \n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -162,7 +159,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -212,9 +209,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -238,16 +238,16 @@ "outputs": [], "source": [ "# Define fast version of sentence transformers, change to cuda if available\n", - "encoder = SentenceTransformer(\"all-MiniLM-L6-v2\", device=\"cuda\") \n", + "encoder = SentenceTransformer(\"all-MiniLM-L6-v2\", device=\"cuda\")\n", "\n", "# Load dataset with banking\n", "dataset = load_dataset(\"banking77\", split=\"test\")\n", "\n", "# Encode text field using batched computation\n", "dataset = dataset.map(\n", - " lambda batch: {\"vectors\": encoder.encode(batch[\"text\"])}, \n", - " batch_size=32, \n", - " batched=True\n", + " lambda batch: {\"vectors\": encoder.encode(batch[\"text\"])},\n", + " batch_size=32,\n", + " batched=True,\n", ")\n", "\n", "# Removes the original labels because you'll be labeling from scratch\n", @@ -463,18 +463,22 @@ }, "outputs": [], "source": [ - "labels = [\"change details\", \"card\", \"atm\", \"top up\", \"balance\", \"transfer\", \"exchange rate\", \"pin\"]\n", + "labels = [\n", + " \"change details\",\n", + " \"card\",\n", + " \"atm\",\n", + " \"top up\",\n", + " \"balance\",\n", + " \"transfer\",\n", + " \"exchange rate\",\n", + " \"pin\",\n", + "]\n", "train_dataset = get_templated_dataset(\n", - " candidate_labels=labels, \n", - " sample_size=8, \n", - " template=\"The customer request is about {}\"\n", + " candidate_labels=labels, sample_size=8, template=\"The customer request is about {}\"\n", ")\n", "\n", "model = SetFitModel.from_pretrained(\"all-MiniLM-L6-v2\")\n", - "trainer = SetFitTrainer(\n", - " model=model,\n", - " train_dataset=train_dataset\n", - ")\n", + "trainer = SetFitTrainer(model=model, train_dataset=train_dataset)\n", "trainer.train()" ] }, @@ -501,7 +505,10 @@ " for pred in probas:\n", " yield [{\"label\": label, \"score\": score} for label, score in zip(labels, pred)]\n", "\n", - "dataset = dataset.map(lambda batch: {\"prediction\": list(get_predictions(batch[\"text\"]))}, batched=True)" + "\n", + "dataset = dataset.map(\n", + " lambda batch: {\"prediction\": list(get_predictions(batch[\"text\"]))}, batched=True\n", + ")" ] }, { @@ -671,7 +678,7 @@ "xaxis": "x3", "y": [ 1, - 0.9354838709677419, + 0.935483870967742, 0.9666666666666666, 0, 0, @@ -681,7 +688,7 @@ 0.8387096774193549, 1, 0.9545454545454546, - 0.9767441860465117, + 0.9767441860465116, 0.9444444444444444, 0.85, 0.8947368421052632, @@ -690,10 +697,10 @@ 0.7848101265822784, 0.95, 0.9047619047619048, - 0.9268292682926829, + 0.9268292682926828, 0.8333333333333334, 1, - 0.9090909090909091 + 0.9090909090909092 ], "yaxis": "y3" } @@ -1593,7 +1600,7 @@ ], "range": [ 0, - 0.9083622762489981 + 0.908362276248998 ], "type": "linear" }, @@ -1703,7 +1710,7 @@ " eval_dataset=ds[\"test\"],\n", " loss_class=CosineSimilarityLoss,\n", " batch_size=16,\n", - " num_iterations=20, \n", + " num_iterations=20,\n", ")\n", "\n", "# Train and evaluate\n", diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-sklearn-weaksupervision.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-sklearn-weaksupervision.ipynb index 161d3fd216..b8644a26c6 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-sklearn-weaksupervision.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-sklearn-weaksupervision.ipynb @@ -132,14 +132,12 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -148,6 +146,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -158,7 +157,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -183,11 +182,19 @@ "outputs": [], "source": [ "from datasets import load_dataset\n", - "from argilla.labeling.text_classification import Rule, WeakMultiLabels, add_rules, delete_rules, update_rules, MajorityVoter" + "from argilla.labeling.text_classification import (\n", + " Rule,\n", + " WeakMultiLabels,\n", + " add_rules,\n", + " delete_rules,\n", + " update_rules,\n", + " MajorityVoter,\n", + ")" ] }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -198,14 +205,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -300,7 +311,7 @@ " Rule('\"can you\"', \"curiosity\"),\n", " Rule('\"would you\"', \"curiosity\"),\n", " Rule('\"do you\"', [\"curiosity\", \"admiration\"]),\n", - " Rule('\"great\"', [\"annoyance\"])\n", + " Rule('\"great\"', [\"annoyance\"]),\n", "]" ] }, @@ -1154,7 +1165,8 @@ "source": [ "rules_to_delete = [\n", " Rule(\"joking\", [\"optimism\", \"admiration\"]),\n", - " Rule('\"do you\"', [\"curiosity\", \"admiration\"])]\n", + " Rule('\"do you\"', [\"curiosity\", \"admiration\"]),\n", + "]\n", "\n", "delete_rules(dataset=\"go_emotions\", rules=rules_to_delete)\n", "\n", @@ -1187,9 +1199,7 @@ "metadata": {}, "outputs": [], "source": [ - "rules_to_update = [\n", - " Rule('\"great\"', [\"admiration\"]),\n", - " Rule(\"yeah\", \"approval\")]\n", + "rules_to_update = [Rule('\"great\"', [\"admiration\"]), Rule(\"yeah\", \"approval\")]\n", "\n", "update_rules(dataset=\"go_emotions\", rules=rules_to_update)" ] @@ -3516,7 +3526,7 @@ "metadata": {}, "outputs": [], "source": [ - "rule = Rule(\"risk\", \"Statistics\")" + "rule = Rule(\"risk\", \"Statistics\")" ] }, { @@ -4407,7 +4417,7 @@ "idx_multi = df.labels.map(lambda x: len(x) > 1)\n", "df[\"is_single\"] = df.labels.map(lambda x: 0 if len(x) > 1 else 1)\n", "df[idx_multi].labels.map(lambda x: [label_freq.append(int(l)) for l in x])\n", - "pd.Series(label_freq).value_counts()\n" + "pd.Series(label_freq).value_counts()" ] }, { diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-snorkel-weaksupervision.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-snorkel-weaksupervision.ipynb index 621b1fcf74..eecc187848 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-snorkel-weaksupervision.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-snorkel-weaksupervision.ipynb @@ -133,14 +133,12 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -149,6 +147,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -159,7 +158,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -194,6 +193,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -204,14 +204,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -979,7 +983,7 @@ "source": [ "# quick look at our training data with the weak labels from our label model\n", "with pd.option_context(\"display.max_colwidth\", None):\n", - " display(pd.DataFrame({\"text\": X_train, \"label\": y_train}))\n" + " display(pd.DataFrame({\"text\": X_train, \"label\": y_train}))" ] }, { @@ -1048,7 +1052,7 @@ "\n", "# extract text and labels\n", "X_test = [rec.text for rec in test_ds]\n", - "y_test = [label2int[rec.annotation] for rec in test_ds]\n" + "y_test = [label2int[rec.annotation] for rec in test_ds]" ] }, { @@ -1072,7 +1076,7 @@ " y=y_test,\n", ")\n", "\n", - "print(f\"Test accuracy: {accuracy}\")\n" + "print(f\"Test accuracy: {accuracy}\")" ] }, { diff --git a/docs/_source/tutorials/notebooks/labelling-tokenclassification-basics.ipynb b/docs/_source/tutorials/notebooks/labelling-tokenclassification-basics.ipynb index 925d650771..6d80c40b84 100644 --- a/docs/_source/tutorials/notebooks/labelling-tokenclassification-basics.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-tokenclassification-basics.ipynb @@ -124,10 +124,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -150,7 +147,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -173,9 +170,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -224,7 +224,7 @@ "from datasets import load_dataset\n", "\n", "ds = rg.DatasetForTokenClassification.from_datasets(\n", - " dataset=load_dataset('argilla/gutenberg_spacy-ner', split='train')\n", + " dataset=load_dataset(\"argilla/gutenberg_spacy-ner\", split=\"train\")\n", ")" ] }, @@ -278,15 +278,15 @@ "source": [ "import httpx\n", "\n", - "rg_client= rg.active_client().client\n", + "rg_client = rg.active_client().client\n", "auth_headers = {\"X-Argilla-API-Key\": rg_client.token}\n", - "http=httpx.Client(base_url=rg_client.base_url, headers=auth_headers)\n", + "http = httpx.Client(base_url=rg_client.base_url, headers=auth_headers)\n", "\n", "# make a request using our Argilla Client\n", "users = http.get(\"/api/users\").json()\n", "\n", "# optional: filter users to get only those with annotator role\n", - "users = [u for u in users if u['role']=='annotator']" + "users = [u for u in users if u[\"role\"] == \"annotator\"]" ] }, { @@ -328,7 +328,7 @@ "\n", "# divide your dataset into chunks of the same length as the users list and make the assignments\n", "n = len(users)\n", - "chunked_records = [ds[i:i + n] for i in range(0, len(ds), n)]\n", + "chunked_records = [ds[i : i + n] for i in range(0, len(ds), n)]\n", "for chunk in chunked_records:\n", " for idx, record in enumerate(chunk):\n", " assignments[users[idx].username].append(record)" @@ -361,15 +361,15 @@ "assigned_records = []\n", "for user, records in assignments.items():\n", " for record in records:\n", - " record.metadata['user'] = user\n", + " record.metadata[\"user\"] = user\n", " assigned_records.append(record)\n", "\n", "# log the records in Argilla\n", "rg.log(\n", " records=assigned_records,\n", - " workspace='recognai',\n", - " name='gutenberg_spacy-ner',\n", - " tags={'with assignments': True}\n", + " workspace=\"recognai\",\n", + " name=\"gutenberg_spacy-ner\",\n", + " tags={\"with assignments\": True},\n", ")" ] }, @@ -393,11 +393,7 @@ "source": [ "# loop through the dictionary and log the dataset for each user\n", "for user, records in assignments.items():\n", - " rg.log(\n", - " records=records,\n", - " workspace=user,\n", - " name='gutenberg_spacy-ner'\n", - " )" + " rg.log(records=records, workspace=user, name=\"gutenberg_spacy-ner\")" ] }, { diff --git a/docs/_source/tutorials/notebooks/labelling-tokenclassification-deletelabels.ipynb b/docs/_source/tutorials/notebooks/labelling-tokenclassification-deletelabels.ipynb index 6f55b74d70..650ebde507 100644 --- a/docs/_source/tutorials/notebooks/labelling-tokenclassification-deletelabels.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-tokenclassification-deletelabels.ipynb @@ -80,10 +80,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -106,7 +103,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -129,9 +126,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -297,7 +297,9 @@ "outputs": [], "source": [ "# get all records with the old label in the annotations or predictions\n", - "records = rg.load(dataset_name, query=f\"annotated_as:{old_label} OR predicted_as:{old_label}\")\n", + "records = rg.load(\n", + " dataset_name, query=f\"annotated_as:{old_label} OR predicted_as:{old_label}\"\n", + ")\n", "len(records)" ] }, @@ -316,10 +318,9 @@ "outputs": [], "source": [ "def cleaning_function(labels, old_label, new_label):\n", - "\n", " # replaces / removes string labels (e.g. TextClassification)\n", " if isinstance(labels, str):\n", - " if labels==old_label:\n", + " if labels == old_label:\n", " labels = new_label\n", "\n", " elif isinstance(labels, list):\n", @@ -329,12 +330,14 @@ " if new_label == None:\n", " labels.remove(old_label)\n", " else:\n", - " labels = [new_label if label == old_label else label for label in labels]\n", + " labels = [\n", + " new_label if label == old_label else label for label in labels\n", + " ]\n", "\n", " # replaces / removes lables in a list of tuples (e.g. Predictions, TokenClassification)\n", " elif isinstance(labels[0], tuple):\n", - " for ix,label in enumerate(labels):\n", - " if label[0]==old_label:\n", + " for ix, label in enumerate(labels):\n", + " if label[0] == old_label:\n", " if new_label == None:\n", " labels.remove(label)\n", " else:\n", @@ -356,7 +359,7 @@ " if record.prediction:\n", " record.prediction = cleaning_function(record.prediction, old_label, new_label)\n", " if record.annotation:\n", - " record.annotation = cleaning_function(record.annotation, old_label, new_label) \n", + " record.annotation = cleaning_function(record.annotation, old_label, new_label)\n", " record.status = \"Default\"" ] }, diff --git a/docs/_source/tutorials/notebooks/labelling-tokenclassification-flair-fewshot.ipynb b/docs/_source/tutorials/notebooks/labelling-tokenclassification-flair-fewshot.ipynb index cf4c625a64..aa5db0300c 100644 --- a/docs/_source/tutorials/notebooks/labelling-tokenclassification-flair-fewshot.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-tokenclassification-flair-fewshot.ipynb @@ -134,15 +134,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -151,6 +148,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -162,7 +160,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -194,6 +192,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -204,14 +203,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -279,7 +282,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Wrap our tokens in a flair Sentence\n", "sentence = Sentence(\" \".join(dataset[0][\"tokens\"]))\n", "\n", diff --git a/docs/_source/tutorials/notebooks/labelling-tokenclassification-skweak-weaksupervision.ipynb b/docs/_source/tutorials/notebooks/labelling-tokenclassification-skweak-weaksupervision.ipynb index bcc02f366a..11bb584d18 100644 --- a/docs/_source/tutorials/notebooks/labelling-tokenclassification-skweak-weaksupervision.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-tokenclassification-skweak-weaksupervision.ipynb @@ -145,15 +145,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -162,6 +159,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -173,7 +171,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -222,6 +220,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -232,14 +231,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -356,7 +359,6 @@ }, "outputs": [], "source": [ - "\n", "conll2003 = load_dataset(\"conll2003\")" ] }, @@ -533,7 +535,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "sports_results_annotator = FunctionAnnotator(\"sports_results\", sports_results_detector)\n", "sports_match_annotator = FunctionAnnotator(\"sports_match\", sports_match_detector)" ] @@ -619,7 +620,7 @@ " yield start, end, label\n", "\n", " start = None\n", - " end = None\n" + " end = None" ] }, { @@ -683,7 +684,7 @@ "for keyword in title_ending:\n", " func = partial(title_detector, keyword=keyword, reverse=True)\n", " annotator = FunctionAnnotator(keyword + \" (end)\", func)\n", - " rule_based_annotator.add_annotator(annotator)\n" + " rule_based_annotator.add_annotator(annotator)" ] }, { @@ -853,7 +854,7 @@ " )\n", "\n", "\n", - "rg.log(records=spans_logger(dev_docs), name=\"conll_2003_dev_spans\")\n" + "rg.log(records=spans_logger(dev_docs), name=\"conll_2003_dev_spans\")" ] }, { @@ -1090,6 +1091,7 @@ " dev_docs, gold_span_name=\"gold\", gold_labels=[\"ORG\", \"MISC\", \"PER\", \"LOC\", \"O\"]\n", ")\n", "\n", + "\n", "def scores_to_df(scores):\n", " for annotator, label_dict in scores.items():\n", " for label, metrics_dict in label_dict.items():\n", @@ -1325,7 +1327,7 @@ "\n", "pd.DataFrame(\n", " [{k: v for k, v in scores.items() if k in [\"ents_p\", \"ents_r\", \"ents_f\"]}]\n", - ").round(3)\n" + ").round(3)" ] }, { diff --git a/docs/_source/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.ipynb b/docs/_source/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.ipynb index acf28ca07c..1936a6576c 100644 --- a/docs/_source/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.ipynb @@ -114,11 +114,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -142,7 +138,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -186,9 +182,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -344,7 +343,7 @@ "source": [ "dataset = load_dataset(\"gutenberg_time\", split=\"train\", streaming=True)\n", "\n", - "# Let's have a look at the first 5 examples of the train set. \n", + "# Let's have a look at the first 5 examples of the train set.\n", "pd.DataFrame(dataset.take(5))" ] }, @@ -395,7 +394,7 @@ " prediction_agent=\"en_core_web_trf\",\n", " )\n", " )\n", - " \n", + "\n", "rg.log(records=records, name=\"gutenberg_spacy_ner\")" ] }, @@ -450,7 +449,7 @@ " prediction_agent=\"en_core_web_sm\",\n", " )\n", " )\n", - " \n", + "\n", "rg.log(records=records, name=\"gutenberg_spacy_ner\")" ] }, @@ -512,10 +511,10 @@ " text=text,\n", " tokens=tokens,\n", " prediction=entities,\n", - " prediction_agent=\"en_core_web_sm\"\n", + " prediction_agent=\"en_core_web_sm\",\n", " )\n", " )\n", - " \n", + "\n", "rg.log(records=records, name=\"imdb_spacy_ner\")" ] }, diff --git a/docs/_source/tutorials/notebooks/monitoring-textclassification-cleanlab-explainability.ipynb b/docs/_source/tutorials/notebooks/monitoring-textclassification-cleanlab-explainability.ipynb index b6a4cb8c41..95accda5a6 100644 --- a/docs/_source/tutorials/notebooks/monitoring-textclassification-cleanlab-explainability.ipynb +++ b/docs/_source/tutorials/notebooks/monitoring-textclassification-cleanlab-explainability.ipynb @@ -114,15 +114,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -131,6 +128,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -142,7 +140,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -171,11 +169,12 @@ "from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.pipeline import Pipeline\n", "\n", - "from argilla.labeling.text_classification import find_label_errors\n" + "from argilla.labeling.text_classification import find_label_errors" ] }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -186,14 +185,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -277,7 +280,7 @@ "classifier = Pipeline([(\"vect\", CountVectorizer()), (\"clf\", MultinomialNB())])\n", "\n", "# Fit the classifier\n", - "classifier.fit(X=ds_train[\"text\"], y=ds_train[\"label\"])\n" + "classifier.fit(X=ds_train[\"text\"], y=ds_train[\"label\"])" ] }, { @@ -411,7 +414,7 @@ "outputs": [], "source": [ "# Uncover label errors in the Argilla web app\n", - "rg.log(records_with_label_error, \"label_errors\")\n" + "rg.log(records_with_label_error, \"label_errors\")" ] }, { @@ -479,7 +482,7 @@ " cv=int(len(ds_train) / len(ds_test)),\n", " method=\"predict_proba\",\n", " n_jobs=-1,\n", - ")\n" + ")" ] }, { diff --git a/docs/_source/tutorials/notebooks/monitoring-textclassification-setfit-explainability.ipynb b/docs/_source/tutorials/notebooks/monitoring-textclassification-setfit-explainability.ipynb index b1b23471e6..2b2a54fa7b 100644 --- a/docs/_source/tutorials/notebooks/monitoring-textclassification-setfit-explainability.ipynb +++ b/docs/_source/tutorials/notebooks/monitoring-textclassification-setfit-explainability.ipynb @@ -1,2412 +1,2421 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "b5Vbz_hkuHAo" - }, - "source": [ - "# 🥇 Compare Text Classification Models\n", - "\n", - "In this tutorial, you'll learn to make text classification on a dataset using two different models, upload models predictions in your Argilla workspace and compare models by computing the F1 score for each model. It will walk you through these steps:\n", - "- 💾 Load the dataset you want to use.\n", - "- 💻 Compute predictions with a zero-shot classification model.\n", - "- 🔄 Convert model output to Argilla format and upload it to Argilla workspace.\n", - "- 💻 Compute predictions with zero-shot SetFit model.\n", - "- 🧪 Compare models predictions with F1 score\n" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "b5Vbz_hkuHAo" + }, + "source": [ + "# 🥇 Compare Text Classification Models\n", + "\n", + "In this tutorial, you'll learn to make text classification on a dataset using two different models, upload models predictions in your Argilla workspace and compare models by computing the F1 score for each model. It will walk you through these steps:\n", + "- 💾 Load the dataset you want to use.\n", + "- 💻 Compute predictions with a zero-shot classification model.\n", + "- 🔄 Convert model output to Argilla format and upload it to Argilla workspace.\n", + "- 💻 Compute predictions with zero-shot SetFit model.\n", + "- 🧪 Compare models predictions with F1 score\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4AbgSHtx1Hbh" + }, + "source": [ + "## Introduction\n", + "When working on Text Classification, you may want to compare two models to decide which one to use.\n", + "For this, we compute the F1 score on train models using their annotations as the true text class.\n", + "The F1 score can be interpreted as a harmonic mean of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. (more info in this [documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html))\n", + "\n", + "*Argilla* allows you to deploy and monitor any model you like, but in this tutorial, we will focus on the two most common frameworks in the NLP space: [transformers](https://huggingface.co/docs/transformers) and [SetFit](https://github.com/huggingface/setfit). Let's get started!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qX1M5cMP1XyX" + }, + "source": [ + "## Running Argilla\n", + "\n", + "For this tutorial, you will need to have an Argilla server running. There are two main options for deploying and running Argilla:\n", + "\n", + "\n", + "**Deploy Argilla on Hugging Face Spaces**: If you want to run tutorials with external notebooks (e.g., Google Colab) and you have an account on Hugging Face, you can deploy Argilla on Spaces with a few clicks:\n", + "\n", + "[![deploy on spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/deploy-to-spaces-lg.svg)](https://huggingface.co/new-space?template=argilla/argilla-template-space)\n", + "\n", + "For details about configuring your deployment, check the [official Hugging Face Hub guide](https://huggingface.co/docs/hub/spaces-sdks-docker-argilla).\n", + "\n", + "\n", + "**Launch Argilla using Argilla's quickstart Docker image**: This is the recommended option if you want [Argilla running on your local machine](../../getting_started/quickstart.ipynb). Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", + "\n", + "For more information on deployment options, please check the Deployment section of the documentation.\n", + "\n", + "
\n", + "\n", + "Tip\n", + " \n", + "This tutorial is a Jupyter Notebook. There are two options to run it:\n", + "\n", + "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", + "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter Notebook tool of your choice.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S_JeXXsL1w4a" + }, + "source": [ + "## Setup\n", + "\n", + "To complete this tutorial, you will need to install the Argilla client and a few third-party libraries using `pip`:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "SbyuZvarlToD" + }, + "outputs": [], + "source": [ + "%pip install transformers argilla datasets torch setfit -qqqqqqq" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XMxT5_tK346_" + }, + "source": [ + "The imports needed:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "W4_oh-TmnOVm" + }, + "outputs": [], + "source": [ + "import argilla as rg\n", + "from datasets import load_dataset\n", + "from transformers import pipeline\n", + "from argilla.metrics.text_classification import f1\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "shy57P-F48iH" + }, + "source": [ + "If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "klxDkoErnaPI" + }, + "outputs": [], + "source": [ + "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", + "# Replace api_key if you configured a custom API key\n", + "# Replace workspace with the name of your workspace\n", + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Set the HF_TOKEN environment variable\n", + "# import os\n", + "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", + "\n", + "# # Replace api_url with the url to your HF Spaces URL\n", + "# # Replace api_key if you configured a custom API key\n", + "# # Replace workspace with the name of your workspace\n", + "# rg.init(\n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", + "# api_key=\"owner.apikey\",\n", + "# workspace=\"admin\",\n", + "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MQibTErO5De0" + }, + "source": [ + "For this tutorial the HugginFace [ag_news](https://huggingface.co/datasets/ag_news) dataset is chosen:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "4AbgSHtx1Hbh" - }, - "source": [ - "## Introduction\n", - "When working on Text Classification, you may want to compare two models to decide which one to use.\n", - "For this, we compute the F1 score on train models using their annotations as the true text class.\n", - "The F1 score can be interpreted as a harmonic mean of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. (more info in this [documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html))\n", - "\n", - "*Argilla* allows you to deploy and monitor any model you like, but in this tutorial, we will focus on the two most common frameworks in the NLP space: [transformers](https://huggingface.co/docs/transformers) and [SetFit](https://github.com/huggingface/setfit). Let's get started!" - ] + "id": "teaiKrR1ng15", + "outputId": "381f5715-9f37-46b8-b45f-ec767c906fc7" + }, + "outputs": [], + "source": [ + "news_dataset = load_dataset(\"ag_news\", split=\"test\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sA2WK_px6pa1" + }, + "source": [ + "This dataset is composed of two columns, one is the text of the news article and the other one is the label associated with this text article:\n", + "\n", + "*For this tutorial, we will consider the label as the annotation of the text*\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "blvo7FZz8ccU" + }, + "source": [ + "We transform our dataset in order to create an argilla TextClassificationDataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "metadata": { - "id": "qX1M5cMP1XyX" - }, - "source": [ - "## Running Argilla\n", - "\n", - "For this tutorial, you will need to have an Argilla server running. There are two main options for deploying and running Argilla:\n", - "\n", - "\n", - "**Deploy Argilla on Hugging Face Spaces**: If you want to run tutorials with external notebooks (e.g., Google Colab) and you have an account on Hugging Face, you can deploy Argilla on Spaces with a few clicks:\n", - "\n", - "[![deploy on spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/deploy-to-spaces-lg.svg)](https://huggingface.co/new-space?template=argilla/argilla-template-space)\n", - "\n", - "For details about configuring your deployment, check the [official Hugging Face Hub guide](https://huggingface.co/docs/hub/spaces-sdks-docker-argilla).\n", - "\n", - "\n", - "**Launch Argilla using Argilla's quickstart Docker image**: This is the recommended option if you want [Argilla running on your local machine](../../getting_started/quickstart.ipynb). Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", - "\n", - "For more information on deployment options, please check the Deployment section of the documentation.\n", - "\n", - "
\n", - "\n", - "Tip\n", - " \n", - "This tutorial is a Jupyter Notebook. There are two options to run it:\n", - "\n", - "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", - "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter Notebook tool of your choice.\n", - "
" - ] + "id": "Ttt0aUJ7pYzz", + "outputId": "15f7798d-6ad7-45f5-82a6-225088ee0076" + }, + "outputs": [], + "source": [ + "int_to_label = {\n", + " 0: \"World\",\n", + " 1: \"Sports\",\n", + " 2: \"Business\",\n", + " 3: \"Sci/Tech\",\n", + "}\n", + "\n", + "news_dataset = news_dataset.map(\n", + " lambda row: {\"prediction\": [{\"label\": int_to_label[row[\"label\"]], \"score\": 1}]}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "metadata": { - "id": "S_JeXXsL1w4a" - }, - "source": [ - "## Setup\n", - "\n", - "To complete this tutorial, you will need to install the Argilla client and a few third-party libraries using `pip`:" - ] + "id": "u687F37SoFn8", + "outputId": "3f111e4d-97c4-41fe-b595-80b4ccda4520" + }, + "outputs": [], + "source": [ + "ds_record = rg.read_datasets(dataset=news_dataset, task=\"TextClassification\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from argilla.utils.telemetry import tutorial_running\n", + "\n", + " tutorial_running()\n", + "except ImportError:\n", + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TEyjYZHAIV9h" + }, + "source": [ + "## Make Zero Shot Text Classification predictions using transformers\n", + "\n", + "On HugginFace we choose the model `cross-encoder/nli-distilroberta-base` that is trained to perform zero-shot classification.\n", + "We create a pipeline with this model and then perform prediction.\n", + "\n", + "*note: `device=0` in pipeline() permits to use GPU if you do not have a GPU available delete this parameter*\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "UW4am9A7n4mI" + }, + "outputs": [], + "source": [ + "labels = [\"Sports\", \"Sci/Tech\", \"Business\", \"World\"]\n", + "\n", + "pipe = pipeline(\n", + " \"zero-shot-classification\", model=\"cross-encoder/nli-distilroberta-base\", device=0\n", + ")\n", + "result = []\n", + "with pipe.device_placement():\n", + " result = pipe(\n", + " [data.text for data in ds_record],\n", + " candidate_labels=labels,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BnT8GZmWKl7S" + }, + "source": [ + "Now that predictions are successfully made with the zero-shot model we can transform it into a list of argilla TextClassificationRecord and upload it to our argilla client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85, + "referenced_widgets": [ + "03352da397fc457fa70dd5e489cc4844", + "9424d0b9953b43638b5f0623ff225d79", + "4e8ec2dc3b564cae937741ac7d47372a", + "58a2528e6a7646089e77174b5ef6d720", + "301a91403585451cad67df96ada457b5", + "b2266a02a7154eb9a7a5ea36734a3902", + "cfb6680ece1946a990572948ecc15941", + "d851b02df22d40528579462ac2649e9c", + "bc75ab7805a041ad91ffcc6cf878212d", + "99ee702176d6453d9df40b1278a3dc0d", + "09a5640c9f9f4d1cb2d320e0f7577469" + ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "SbyuZvarlToD" - }, - "outputs": [], - "source": [ - "%pip install transformers argilla datasets torch setfit -qqqqqqq" - ] + "id": "ft7e8RApyW0u", + "outputId": "b7414e09-d2a5-4c8e-9dd2-364b53f8ee70" + }, + "outputs": [], + "source": [ + "zero_shot_news_dataset = [\n", + " rg.TextClassificationRecord(\n", + " text=res[\"sequence\"],\n", + " prediction=list(zip(res[\"labels\"], res[\"scores\"])),\n", + " annotation=record.prediction[0][0],\n", + " )\n", + " for res, record in zip(result, ds_record)\n", + "]\n", + "\n", + "rg.log(name=\"zero_shot_news_dataset\", records=zero_shot_news_dataset)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "T_weCa-YPAzF" + }, + "source": [ + "You can access the zero_shot_news_dataset in the Argilla UI:\n", + "\n", + "\"Model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z7GTj9I9K-mf" + }, + "source": [ + "Finally, we measure the performance of our model using the argilla f1 function that computes the F1 score:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 }, + "id": "1iShwCbj3n-l", + "outputId": "b02aa4e4-cdc9-4d07-e2e0-f7d794b45706" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "XMxT5_tK346_" - }, - "source": [ - "The imports needed:" + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "f1_zero_shot = f1(\"zero_shot_news_dataset\")\n", + "f1_zero_shot.visualize()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "l4gY2nuk2c0G" + }, + "source": [ + "## Make zero-shot text classification using a trained SetFit classifier" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The imports needed:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "n5UcoG5l3lRy" + }, + "outputs": [], + "source": [ + "from setfit import SetFitModel, SetFitTrainer, get_templated_dataset" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We create a synthetic dataset of training examples using our dataset's labels" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "8DTm1BqI2dZp" + }, + "outputs": [], + "source": [ + "labels = [\"Sports\", \"Sci/Tech\", \"Business\", \"World\"]\n", + "train_dataset = get_templated_dataset(\n", + " candidate_labels=labels, sample_size=8, template=\"The news article is about {}\"\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We train a SetFitModel using the pretrained model 'all-MiniLM-L6-v2'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 244, + "referenced_widgets": [ + "e90626dee9ad4610a2d3b04c5aa7cfd8", + "f249c3c6f884440db96d66715cd48247", + "9f6b243e854d40de8339d5920ea9b2c8", + "981710df13494b958e9e3fb366bd53d4", + "269c0b2820b44ba7844f451dc6b7ce62", + "d4bbeddf2bf04fc88c02995657c58410", + "5ab6d3b8e01b4858bcd2ef38475265c0", + "cb9c3a0c0ba04bc6a062e696ec4616b7", + "c7ba9a5b20fc4af2b2a044585e93e5ec", + "0ed8954de1e0470a87bba037516159ce", + "ef745b03cfec4cd793b852d29185d042", + "f773cd8ad7df481e9e606e245c37c575", + "a9eb439d911346c1bc523d1719f96c72", + "03fc89e7ed73469f9641f0e27ff60920", + "371ebf2451ca4045aa8c7c29aa80b81f", + "ffc76bd466ed46518d5511ceb0cb9086", + "14e4c79dd65d4fafb02701e775873e2a", + "cf2c6b3aa42343a1b6ea765a17b133e5", + "65f152c3f69f4e0781bea0287d5f8f14", + "6fdf42751cd74adf857773c33e2d8e21", + "03cb83f82a494893bb8bb94ea588fb62", + "f0fab85f8e8d4d1ca46bfbe14d50fe57" + ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "W4_oh-TmnOVm" - }, - "outputs": [], - "source": [ - "import argilla as rg\n", - "from datasets import load_dataset\n", - "from transformers import pipeline\n", - "from argilla.metrics.text_classification import f1\n", - "import pandas as pd" - ] + "id": "YtLxfwJd7u5c", + "outputId": "1855f46b-e01c-4f39-99af-166d5e9b944c" + }, + "outputs": [], + "source": [ + "model = SetFitModel.from_pretrained(\"all-MiniLM-L6-v2\")\n", + "trainer = SetFitTrainer(model=model, train_dataset=train_dataset)\n", + "trainer.train()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are then able to compute the text classification: " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "ytsi4_No0_pd" + }, + "outputs": [], + "source": [ + "result = [\n", + " {\n", + " \"sequence\": data[\"text\"],\n", + " \"scores\": model.predict_proba([data[\"text\"]]).squeeze().numpy(),\n", + " \"labels\": labels,\n", + " }\n", + " for data in news_dataset\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can log the result dataset into Argilla and compute the F1 score:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85, + "referenced_widgets": [ + "9d7425ebb4854fc4b4c957a7a8805b0f", + "4adcf6345d2f48a1b8f912ba2813b701", + "273687d63b794ce3982fcc4b32dc40ac", + "ef39075183f74473bbfcfb08af09c484", + "d91f02b4b9aa43ebaa6be94fb5465ada", + "50581d62125c4153b6cc9d3f43a3161e", + "4b4f86bb0983441288adee5e7a0de959", + "5d419a4d2acf48cc8b3626bfb63c4cd4", + "0504c191fa294f62854e3dc44eba1a1a", + "9f3f65b2cc274246ba55ca32bef5fc10", + "ff70550ed86146049a48faae2f9bf134" + ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "shy57P-F48iH" - }, - "source": [ - "If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" - ] + "id": "21X5laS15hxa", + "outputId": "517832f5-f52c-40df-fd15-38341ce792a7" + }, + "outputs": [], + "source": [ + "setfit_zero_shot_news_dataset = [\n", + " rg.TextClassificationRecord(\n", + " text=res[\"sequence\"],\n", + " prediction=list(zip(res[\"labels\"], res[\"scores\"])),\n", + " annotation=record.prediction[0][0],\n", + " )\n", + " for res, record in zip(result, ds_record)\n", + "]\n", + "rg.log(name=\"setfit_zero_shot_news_dataset\", records=setfit_zero_shot_news_dataset)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can access the setfit_zero_shot_news_dataset in the Argilla UI:\n", + "\n", + "\"Model" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 }, + "id": "uCQeM8V35w1Q", + "outputId": "0fdcae45-0cfa-4674-f3ab-ea730da7d8f8" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "klxDkoErnaPI" - }, - "outputs": [], - "source": [ - "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", - "# Replace api_key if you configured a custom API key\n", - "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "f1_setfit_zero_shot = f1(\"setfit_zero_shot_news_dataset\")\n", + "f1_setfit_zero_shot.visualize()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3rH58lEpOPQt" + }, + "source": [ + "Now that we have computed the F1 score for each model with can create a comparison table:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "NJ6X5SehqYzX" + }, + "outputs": [], + "source": [ + "f_score = list(f1_setfit_zero_shot.data.keys())\n", + "f1_setfit_zero_shot_values = list(f1_setfit_zero_shot.data.values())\n", + "f1_zero_shot_values = list(f1_zero_shot.data.values())\n", + "\n", + "unnecessary_labels = [\"Sports_recall\", \"World_recall\", \"\"]\n", + "\n", + "df_results = pd.DataFrame(\n", + " {\n", + " \"f_score\": f_score,\n", + " \"zero-shot classification\": f1_zero_shot_values,\n", + " \"zero-shot SetFit classification\": f1_setfit_zero_shot_values,\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 739 }, + "id": "xvv8J-VBrzVE", + "outputId": "07a04080-a989-4e3d-fd80-d12c4a078078" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
f_scorezero-shot classificationzero-shot SetFit classification
0precision_macro0.5177540.663322
1recall_macro0.5296050.668816
2f1_macro0.5144830.663725
3precision_micro0.5296050.668816
4recall_micro0.5296050.668816
5f1_micro0.5296050.668816
6Sci/Tech_precision0.4769500.556291
7Sci/Tech_recall0.2831580.530526
8Sci/Tech_f10.3553500.543103
9Sci/Tech_support11400.0000007600.000000
10World_precision0.3679090.663734
11World_recall0.3584210.555789
12World_f10.3631030.604984
13World_support11400.0000007600.000000
14Business_precision0.4492270.620098
15Business_recall0.5657890.665789
16Business_f10.5008150.642132
17Business_support11400.0000007600.000000
18Sports_precision0.7769300.813166
19Sports_recall0.9110530.923158
20Sports_f10.8386630.864678
21Sports_support11400.0000007600.000000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " f_score zero-shot classification \\\n", + "0 precision_macro 0.517754 \n", + "1 recall_macro 0.529605 \n", + "2 f1_macro 0.514483 \n", + "3 precision_micro 0.529605 \n", + "4 recall_micro 0.529605 \n", + "5 f1_micro 0.529605 \n", + "6 Sci/Tech_precision 0.476950 \n", + "7 Sci/Tech_recall 0.283158 \n", + "8 Sci/Tech_f1 0.355350 \n", + "9 Sci/Tech_support 11400.000000 \n", + "10 World_precision 0.367909 \n", + "11 World_recall 0.358421 \n", + "12 World_f1 0.363103 \n", + "13 World_support 11400.000000 \n", + "14 Business_precision 0.449227 \n", + "15 Business_recall 0.565789 \n", + "16 Business_f1 0.500815 \n", + "17 Business_support 11400.000000 \n", + "18 Sports_precision 0.776930 \n", + "19 Sports_recall 0.911053 \n", + "20 Sports_f1 0.838663 \n", + "21 Sports_support 11400.000000 \n", + "\n", + " zero-shot SetFit classification \n", + "0 0.663322 \n", + "1 0.668816 \n", + "2 0.663725 \n", + "3 0.668816 \n", + "4 0.668816 \n", + "5 0.668816 \n", + "6 0.556291 \n", + "7 0.530526 \n", + "8 0.543103 \n", + "9 7600.000000 \n", + "10 0.663734 \n", + "11 0.555789 \n", + "12 0.604984 \n", + "13 7600.000000 \n", + "14 0.620098 \n", + "15 0.665789 \n", + "16 0.642132 \n", + "17 7600.000000 \n", + "18 0.813166 \n", + "19 0.923158 \n", + "20 0.864678 \n", + "21 7600.000000 " ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_results" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oGi3_YgFty_0" + }, + "source": [ + "*Results interpretation:* Without any hesitation the zero-shot classification using SetFit model is the more effective one. The F1 score for each class is better.\n", + "\n", + "The best-predicted class for both classifiers is **Sports**." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "03352da397fc457fa70dd5e489cc4844": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9424d0b9953b43638b5f0623ff225d79", + "IPY_MODEL_4e8ec2dc3b564cae937741ac7d47372a", + "IPY_MODEL_58a2528e6a7646089e77174b5ef6d720" + ], + "layout": "IPY_MODEL_301a91403585451cad67df96ada457b5" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Set the HF_TOKEN environment variable\n", - "# import os\n", - "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", - "\n", - "# # Replace api_url with the url to your HF Spaces URL\n", - "# # Replace api_key if you configured a custom API key\n", - "# # Replace workspace with the name of your workspace\n", - "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", - "# api_key=\"owner.apikey\",\n", - "# workspace=\"admin\",\n", - "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", - "# )" - ] + "03cb83f82a494893bb8bb94ea588fb62": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "MQibTErO5De0" - }, - "source": [ - "For this tutorial the HugginFace [ag_news](https://huggingface.co/datasets/ag_news) dataset is chosen:" - ] + "03fc89e7ed73469f9641f0e27ff60920": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_65f152c3f69f4e0781bea0287d5f8f14", + "max": 80, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6fdf42751cd74adf857773c33e2d8e21", + "value": 80 + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "teaiKrR1ng15", - "outputId": "381f5715-9f37-46b8-b45f-ec767c906fc7" - }, - "outputs": [], - "source": [ - "news_dataset = load_dataset(\"ag_news\", split=\"test\")" - ] + "0504c191fa294f62854e3dc44eba1a1a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "sA2WK_px6pa1" - }, - "source": [ - "This dataset is composed of two columns, one is the text of the news article and the other one is the label associated with this text article:\n", - "\n", - "*For this tutorial, we will consider the label as the annotation of the text*\n", - "\n" - ] + "09a5640c9f9f4d1cb2d320e0f7577469": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "blvo7FZz8ccU" - }, - "source": [ - "We transform our dataset in order to create an argilla TextClassificationDataset:" - ] + "0ed8954de1e0470a87bba037516159ce": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ttt0aUJ7pYzz", - "outputId": "15f7798d-6ad7-45f5-82a6-225088ee0076" - }, - "outputs": [], - "source": [ - "int_to_label = {\n", - " 0:\"World\",\n", - " 1:\"Sports\",\n", - " 2:\"Business\",\n", - " 3:\"Sci/Tech\",\n", - "}\n", - "\n", - "news_dataset = news_dataset.map(lambda row: {\"prediction\": [{\"label\":int_to_label[row[\"label\"]], \"score\":1}]})" - ] + "14e4c79dd65d4fafb02701e775873e2a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "u687F37SoFn8", - "outputId": "3f111e4d-97c4-41fe-b595-80b4ccda4520" - }, - "outputs": [], - "source": [ - "ds_record = rg.read_datasets(dataset=news_dataset, task=\"TextClassification\")" - ] + "269c0b2820b44ba7844f451dc6b7ce62": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Enable Telemetry\n", - "\n", - "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." - ] + "273687d63b794ce3982fcc4b32dc40ac": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5d419a4d2acf48cc8b3626bfb63c4cd4", + "max": 7600, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0504c191fa294f62854e3dc44eba1a1a", + "value": 7600 + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " from argilla.utils.telemetry import tutorial_running\n", - " tutorial_running()\n", - "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" - ] + "301a91403585451cad67df96ada457b5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "TEyjYZHAIV9h" - }, - "source": [ - "## Make Zero Shot Text Classification predictions using transformers\n", - "\n", - "On HugginFace we choose the model `cross-encoder/nli-distilroberta-base` that is trained to perform zero-shot classification.\n", - "We create a pipeline with this model and then perform prediction.\n", - "\n", - "*note: `device=0` in pipeline() permits to use GPU if you do not have a GPU available delete this parameter*\n", - "\n", - "\n" - ] + "371ebf2451ca4045aa8c7c29aa80b81f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03cb83f82a494893bb8bb94ea588fb62", + "placeholder": "​", + "style": "IPY_MODEL_f0fab85f8e8d4d1ca46bfbe14d50fe57", + "value": " 80/80 [00:04<00:00, 18.35it/s]" + } }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "UW4am9A7n4mI" - }, - "outputs": [], - "source": [ - "labels =[\"Sports\", \"Sci/Tech\", \"Business\", \"World\"]\n", - "\n", - "pipe = pipeline(\"zero-shot-classification\", model='cross-encoder/nli-distilroberta-base', device=0)\n", - "result = []\n", - "with pipe.device_placement():\n", - " result = pipe(\n", - " [data.text for data in ds_record],\n", - " candidate_labels=labels,\n", - " )" - ] + "4adcf6345d2f48a1b8f912ba2813b701": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_50581d62125c4153b6cc9d3f43a3161e", + "placeholder": "​", + "style": "IPY_MODEL_4b4f86bb0983441288adee5e7a0de959", + "value": "100%" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "BnT8GZmWKl7S" - }, - "source": [ - "Now that predictions are successfully made with the zero-shot model we can transform it into a list of argilla TextClassificationRecord and upload it to our argilla client" - ] + "4b4f86bb0983441288adee5e7a0de959": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 85, - "referenced_widgets": [ - "03352da397fc457fa70dd5e489cc4844", - "9424d0b9953b43638b5f0623ff225d79", - "4e8ec2dc3b564cae937741ac7d47372a", - "58a2528e6a7646089e77174b5ef6d720", - "301a91403585451cad67df96ada457b5", - "b2266a02a7154eb9a7a5ea36734a3902", - "cfb6680ece1946a990572948ecc15941", - "d851b02df22d40528579462ac2649e9c", - "bc75ab7805a041ad91ffcc6cf878212d", - "99ee702176d6453d9df40b1278a3dc0d", - "09a5640c9f9f4d1cb2d320e0f7577469" - ] - }, - "id": "ft7e8RApyW0u", - "outputId": "b7414e09-d2a5-4c8e-9dd2-364b53f8ee70" - }, - "outputs": [], - "source": [ - "zero_shot_news_dataset = [\n", - " rg.TextClassificationRecord(\n", - " text=res[\"sequence\"],\n", - " prediction=list(zip(res['labels'],res['scores'])),\n", - " annotation=record.prediction[0][0],\n", - " ) for res, record in zip(result, ds_record)\n", - "]\n", - "\n", - "rg.log(name=\"zero_shot_news_dataset\", records=zero_shot_news_dataset)" - ] + "4e8ec2dc3b564cae937741ac7d47372a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d851b02df22d40528579462ac2649e9c", + "max": 7600, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bc75ab7805a041ad91ffcc6cf878212d", + "value": 7600 + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "T_weCa-YPAzF" - }, - "source": [ - "You can access the zero_shot_news_dataset in the Argilla UI:\n", - "\n", - "\"Model" - ] + "50581d62125c4153b6cc9d3f43a3161e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "Z7GTj9I9K-mf" - }, - "source": [ - "Finally, we measure the performance of our model using the argilla f1 function that computes the F1 score:" - ] + "58a2528e6a7646089e77174b5ef6d720": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_99ee702176d6453d9df40b1278a3dc0d", + "placeholder": "​", + "style": "IPY_MODEL_09a5640c9f9f4d1cb2d320e0f7577469", + "value": " 7600/7600 [00:31<00:00, 218.52it/s]" + } }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 542 - }, - "id": "1iShwCbj3n-l", - "outputId": "b02aa4e4-cdc9-4d07-e2e0-f7d794b45706" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "5ab6d3b8e01b4858bcd2ef38475265c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5d419a4d2acf48cc8b3626bfb63c4cd4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "65f152c3f69f4e0781bea0287d5f8f14": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6fdf42751cd74adf857773c33e2d8e21": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9424d0b9953b43638b5f0623ff225d79": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b2266a02a7154eb9a7a5ea36734a3902", + "placeholder": "​", + "style": "IPY_MODEL_cfb6680ece1946a990572948ecc15941", + "value": "100%" + } + }, + "981710df13494b958e9e3fb366bd53d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0ed8954de1e0470a87bba037516159ce", + "placeholder": "​", + "style": "IPY_MODEL_ef745b03cfec4cd793b852d29185d042", + "value": " 1/1 [00:04<00:00, 4.40s/it]" + } + }, + "99ee702176d6453d9df40b1278a3dc0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9d7425ebb4854fc4b4c957a7a8805b0f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4adcf6345d2f48a1b8f912ba2813b701", + "IPY_MODEL_273687d63b794ce3982fcc4b32dc40ac", + "IPY_MODEL_ef39075183f74473bbfcfb08af09c484" ], - "source": [ - "f1_zero_shot = f1(\"zero_shot_news_dataset\")\n", - "f1_zero_shot.visualize()" - ] + "layout": "IPY_MODEL_d91f02b4b9aa43ebaa6be94fb5465ada" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "l4gY2nuk2c0G" - }, - "source": [ - "## Make zero-shot text classification using a trained SetFit classifier" - ] + "9f3f65b2cc274246ba55ca32bef5fc10": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The imports needed:" - ] + "9f6b243e854d40de8339d5920ea9b2c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cb9c3a0c0ba04bc6a062e696ec4616b7", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c7ba9a5b20fc4af2b2a044585e93e5ec", + "value": 1 + } }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "n5UcoG5l3lRy" - }, - "outputs": [], - "source": [ - "from setfit import SetFitModel, SetFitTrainer, get_templated_dataset" - ] + "a9eb439d911346c1bc523d1719f96c72": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_14e4c79dd65d4fafb02701e775873e2a", + "placeholder": "​", + "style": "IPY_MODEL_cf2c6b3aa42343a1b6ea765a17b133e5", + "value": "Iteration: 100%" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We create a synthetic dataset of training examples using our dataset's labels" - ] + "b2266a02a7154eb9a7a5ea36734a3902": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "8DTm1BqI2dZp" - }, - "outputs": [], - "source": [ - "labels = [\"Sports\", \"Sci/Tech\", \"Business\", \"World\"]\n", - "train_dataset = get_templated_dataset(\n", - " candidate_labels=labels,\n", - " sample_size=8,\n", - " template=\"The news article is about {}\"\n", - ")" - ] + "bc75ab7805a041ad91ffcc6cf878212d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We train a SetFitModel using the pretrained model 'all-MiniLM-L6-v2'" - ] + "c7ba9a5b20fc4af2b2a044585e93e5ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 244, - "referenced_widgets": [ - "e90626dee9ad4610a2d3b04c5aa7cfd8", - "f249c3c6f884440db96d66715cd48247", - "9f6b243e854d40de8339d5920ea9b2c8", - "981710df13494b958e9e3fb366bd53d4", - "269c0b2820b44ba7844f451dc6b7ce62", - "d4bbeddf2bf04fc88c02995657c58410", - "5ab6d3b8e01b4858bcd2ef38475265c0", - "cb9c3a0c0ba04bc6a062e696ec4616b7", - "c7ba9a5b20fc4af2b2a044585e93e5ec", - "0ed8954de1e0470a87bba037516159ce", - "ef745b03cfec4cd793b852d29185d042", - "f773cd8ad7df481e9e606e245c37c575", - "a9eb439d911346c1bc523d1719f96c72", - "03fc89e7ed73469f9641f0e27ff60920", - "371ebf2451ca4045aa8c7c29aa80b81f", - "ffc76bd466ed46518d5511ceb0cb9086", - "14e4c79dd65d4fafb02701e775873e2a", - "cf2c6b3aa42343a1b6ea765a17b133e5", - "65f152c3f69f4e0781bea0287d5f8f14", - "6fdf42751cd74adf857773c33e2d8e21", - "03cb83f82a494893bb8bb94ea588fb62", - "f0fab85f8e8d4d1ca46bfbe14d50fe57" - ] - }, - "id": "YtLxfwJd7u5c", - "outputId": "1855f46b-e01c-4f39-99af-166d5e9b944c" - }, - "outputs": [], - "source": [ - "model = SetFitModel.from_pretrained(\"all-MiniLM-L6-v2\")\n", - "trainer = SetFitTrainer(\n", - " model=model,\n", - " train_dataset=train_dataset\n", - ")\n", - "trainer.train()" - ] + "cb9c3a0c0ba04bc6a062e696ec4616b7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We are then able to compute the text classification: " - ] + "cf2c6b3aa42343a1b6ea765a17b133e5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "ytsi4_No0_pd" - }, - "outputs": [], - "source": [ - "result = [{\n", - " 'sequence': data[\"text\"],\n", - " 'scores': model.predict_proba([data[\"text\"]]).squeeze().numpy(),\n", - " 'labels': labels\n", - " } for data in news_dataset] " - ] + "cfb6680ece1946a990572948ecc15941": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we can log the result dataset into Argilla and compute the F1 score:" - ] + "d4bbeddf2bf04fc88c02995657c58410": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 85, - "referenced_widgets": [ - "9d7425ebb4854fc4b4c957a7a8805b0f", - "4adcf6345d2f48a1b8f912ba2813b701", - "273687d63b794ce3982fcc4b32dc40ac", - "ef39075183f74473bbfcfb08af09c484", - "d91f02b4b9aa43ebaa6be94fb5465ada", - "50581d62125c4153b6cc9d3f43a3161e", - "4b4f86bb0983441288adee5e7a0de959", - "5d419a4d2acf48cc8b3626bfb63c4cd4", - "0504c191fa294f62854e3dc44eba1a1a", - "9f3f65b2cc274246ba55ca32bef5fc10", - "ff70550ed86146049a48faae2f9bf134" - ] - }, - "id": "21X5laS15hxa", - "outputId": "517832f5-f52c-40df-fd15-38341ce792a7" - }, - "outputs": [], - "source": [ - "setfit_zero_shot_news_dataset = [\n", - " rg.TextClassificationRecord(\n", - " text=res[\"sequence\"],\n", - " prediction=list(zip(res['labels'],res['scores'])),\n", - " annotation=record.prediction[0][0],\n", - " ) for res, record in zip(result, ds_record)\n", - "]\n", - "rg.log(name=\"setfit_zero_shot_news_dataset\", records=setfit_zero_shot_news_dataset)" - ] + "d851b02df22d40528579462ac2649e9c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can access the setfit_zero_shot_news_dataset in the Argilla UI:\n", - "\n", - "\"Model" - ] + "d91f02b4b9aa43ebaa6be94fb5465ada": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 542 - }, - "id": "uCQeM8V35w1Q", - "outputId": "0fdcae45-0cfa-4674-f3ab-ea730da7d8f8" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "e90626dee9ad4610a2d3b04c5aa7cfd8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f249c3c6f884440db96d66715cd48247", + "IPY_MODEL_9f6b243e854d40de8339d5920ea9b2c8", + "IPY_MODEL_981710df13494b958e9e3fb366bd53d4" ], - "source": [ - "f1_setfit_zero_shot = f1(\"setfit_zero_shot_news_dataset\")\n", - "f1_setfit_zero_shot.visualize()" - ] + "layout": "IPY_MODEL_269c0b2820b44ba7844f451dc6b7ce62" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "3rH58lEpOPQt" - }, - "source": [ - "Now that we have computed the F1 score for each model with can create a comparison table:\n", - "\n" - ] + "ef39075183f74473bbfcfb08af09c484": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9f3f65b2cc274246ba55ca32bef5fc10", + "placeholder": "​", + "style": "IPY_MODEL_ff70550ed86146049a48faae2f9bf134", + "value": " 7600/7600 [00:34<00:00, 167.51it/s]" + } }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "id": "NJ6X5SehqYzX" - }, - "outputs": [], - "source": [ - "f_score = list(f1_setfit_zero_shot.data.keys())\n", - "f1_setfit_zero_shot_values = list(f1_setfit_zero_shot.data.values())\n", - "f1_zero_shot_values = list(f1_zero_shot.data.values())\n", - "\n", - "unnecessary_labels = [\"Sports_recall\", \"World_recall\", \"\"]\n", - "\n", - "df_results = pd.DataFrame({\"f_score\": f_score, \"zero-shot classification\": f1_zero_shot_values, \"zero-shot SetFit classification\": f1_setfit_zero_shot_values})" - ] + "ef745b03cfec4cd793b852d29185d042": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 739 - }, - "id": "xvv8J-VBrzVE", - "outputId": "07a04080-a989-4e3d-fd80-d12c4a078078" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
f_scorezero-shot classificationzero-shot SetFit classification
0precision_macro0.5177540.663322
1recall_macro0.5296050.668816
2f1_macro0.5144830.663725
3precision_micro0.5296050.668816
4recall_micro0.5296050.668816
5f1_micro0.5296050.668816
6Sci/Tech_precision0.4769500.556291
7Sci/Tech_recall0.2831580.530526
8Sci/Tech_f10.3553500.543103
9Sci/Tech_support11400.0000007600.000000
10World_precision0.3679090.663734
11World_recall0.3584210.555789
12World_f10.3631030.604984
13World_support11400.0000007600.000000
14Business_precision0.4492270.620098
15Business_recall0.5657890.665789
16Business_f10.5008150.642132
17Business_support11400.0000007600.000000
18Sports_precision0.7769300.813166
19Sports_recall0.9110530.923158
20Sports_f10.8386630.864678
21Sports_support11400.0000007600.000000
\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ], - "text/plain": [ - " f_score zero-shot classification \\\n", - "0 precision_macro 0.517754 \n", - "1 recall_macro 0.529605 \n", - "2 f1_macro 0.514483 \n", - "3 precision_micro 0.529605 \n", - "4 recall_micro 0.529605 \n", - "5 f1_micro 0.529605 \n", - "6 Sci/Tech_precision 0.476950 \n", - "7 Sci/Tech_recall 0.283158 \n", - "8 Sci/Tech_f1 0.355350 \n", - "9 Sci/Tech_support 11400.000000 \n", - "10 World_precision 0.367909 \n", - "11 World_recall 0.358421 \n", - "12 World_f1 0.363103 \n", - "13 World_support 11400.000000 \n", - "14 Business_precision 0.449227 \n", - "15 Business_recall 0.565789 \n", - "16 Business_f1 0.500815 \n", - "17 Business_support 11400.000000 \n", - "18 Sports_precision 0.776930 \n", - "19 Sports_recall 0.911053 \n", - "20 Sports_f1 0.838663 \n", - "21 Sports_support 11400.000000 \n", - "\n", - " zero-shot SetFit classification \n", - "0 0.663322 \n", - "1 0.668816 \n", - "2 0.663725 \n", - "3 0.668816 \n", - "4 0.668816 \n", - "5 0.668816 \n", - "6 0.556291 \n", - "7 0.530526 \n", - "8 0.543103 \n", - "9 7600.000000 \n", - "10 0.663734 \n", - "11 0.555789 \n", - "12 0.604984 \n", - "13 7600.000000 \n", - "14 0.620098 \n", - "15 0.665789 \n", - "16 0.642132 \n", - "17 7600.000000 \n", - "18 0.813166 \n", - "19 0.923158 \n", - "20 0.864678 \n", - "21 7600.000000 " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_results" - ] + "f0fab85f8e8d4d1ca46bfbe14d50fe57": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "oGi3_YgFty_0" - }, - "source": [ - "*Results interpretation:* Without any hesitation the zero-shot classification using SetFit model is the more effective one. The F1 score for each class is better.\n", - "\n", - "The best-predicted class for both classifiers is **Sports**." - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "provenance": [] + "f249c3c6f884440db96d66715cd48247": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d4bbeddf2bf04fc88c02995657c58410", + "placeholder": "​", + "style": "IPY_MODEL_5ab6d3b8e01b4858bcd2ef38475265c0", + "value": "Epoch: 100%" + } }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3", - "name": "python3" + "f773cd8ad7df481e9e606e245c37c575": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a9eb439d911346c1bc523d1719f96c72", + "IPY_MODEL_03fc89e7ed73469f9641f0e27ff60920", + "IPY_MODEL_371ebf2451ca4045aa8c7c29aa80b81f" + ], + "layout": "IPY_MODEL_ffc76bd466ed46518d5511ceb0cb9086" + } }, - "language_info": { - "name": "python" + "ff70550ed86146049a48faae2f9bf134": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "03352da397fc457fa70dd5e489cc4844": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9424d0b9953b43638b5f0623ff225d79", - "IPY_MODEL_4e8ec2dc3b564cae937741ac7d47372a", - "IPY_MODEL_58a2528e6a7646089e77174b5ef6d720" - ], - "layout": "IPY_MODEL_301a91403585451cad67df96ada457b5" - } - }, - "03cb83f82a494893bb8bb94ea588fb62": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "03fc89e7ed73469f9641f0e27ff60920": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_65f152c3f69f4e0781bea0287d5f8f14", - "max": 80, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6fdf42751cd74adf857773c33e2d8e21", - "value": 80 - } - }, - "0504c191fa294f62854e3dc44eba1a1a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "09a5640c9f9f4d1cb2d320e0f7577469": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0ed8954de1e0470a87bba037516159ce": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "14e4c79dd65d4fafb02701e775873e2a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "269c0b2820b44ba7844f451dc6b7ce62": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "273687d63b794ce3982fcc4b32dc40ac": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5d419a4d2acf48cc8b3626bfb63c4cd4", - "max": 7600, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0504c191fa294f62854e3dc44eba1a1a", - "value": 7600 - } - }, - "301a91403585451cad67df96ada457b5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "371ebf2451ca4045aa8c7c29aa80b81f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_03cb83f82a494893bb8bb94ea588fb62", - "placeholder": "​", - "style": "IPY_MODEL_f0fab85f8e8d4d1ca46bfbe14d50fe57", - "value": " 80/80 [00:04<00:00, 18.35it/s]" - } - }, - "4adcf6345d2f48a1b8f912ba2813b701": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_50581d62125c4153b6cc9d3f43a3161e", - "placeholder": "​", - "style": "IPY_MODEL_4b4f86bb0983441288adee5e7a0de959", - "value": "100%" - } - }, - "4b4f86bb0983441288adee5e7a0de959": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4e8ec2dc3b564cae937741ac7d47372a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d851b02df22d40528579462ac2649e9c", - "max": 7600, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_bc75ab7805a041ad91ffcc6cf878212d", - "value": 7600 - } - }, - "50581d62125c4153b6cc9d3f43a3161e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "58a2528e6a7646089e77174b5ef6d720": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_99ee702176d6453d9df40b1278a3dc0d", - "placeholder": "​", - "style": "IPY_MODEL_09a5640c9f9f4d1cb2d320e0f7577469", - "value": " 7600/7600 [00:31<00:00, 218.52it/s]" - } - }, - "5ab6d3b8e01b4858bcd2ef38475265c0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5d419a4d2acf48cc8b3626bfb63c4cd4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "65f152c3f69f4e0781bea0287d5f8f14": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6fdf42751cd74adf857773c33e2d8e21": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "9424d0b9953b43638b5f0623ff225d79": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b2266a02a7154eb9a7a5ea36734a3902", - "placeholder": "​", - "style": "IPY_MODEL_cfb6680ece1946a990572948ecc15941", - "value": "100%" - } - }, - "981710df13494b958e9e3fb366bd53d4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0ed8954de1e0470a87bba037516159ce", - "placeholder": "​", - "style": "IPY_MODEL_ef745b03cfec4cd793b852d29185d042", - "value": " 1/1 [00:04<00:00, 4.40s/it]" - } - }, - "99ee702176d6453d9df40b1278a3dc0d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9d7425ebb4854fc4b4c957a7a8805b0f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4adcf6345d2f48a1b8f912ba2813b701", - "IPY_MODEL_273687d63b794ce3982fcc4b32dc40ac", - "IPY_MODEL_ef39075183f74473bbfcfb08af09c484" - ], - "layout": "IPY_MODEL_d91f02b4b9aa43ebaa6be94fb5465ada" - } - }, - "9f3f65b2cc274246ba55ca32bef5fc10": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9f6b243e854d40de8339d5920ea9b2c8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cb9c3a0c0ba04bc6a062e696ec4616b7", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c7ba9a5b20fc4af2b2a044585e93e5ec", - "value": 1 - } - }, - "a9eb439d911346c1bc523d1719f96c72": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_14e4c79dd65d4fafb02701e775873e2a", - "placeholder": "​", - "style": "IPY_MODEL_cf2c6b3aa42343a1b6ea765a17b133e5", - "value": "Iteration: 100%" - } - }, - "b2266a02a7154eb9a7a5ea36734a3902": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bc75ab7805a041ad91ffcc6cf878212d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c7ba9a5b20fc4af2b2a044585e93e5ec": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "cb9c3a0c0ba04bc6a062e696ec4616b7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cf2c6b3aa42343a1b6ea765a17b133e5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cfb6680ece1946a990572948ecc15941": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d4bbeddf2bf04fc88c02995657c58410": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d851b02df22d40528579462ac2649e9c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d91f02b4b9aa43ebaa6be94fb5465ada": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e90626dee9ad4610a2d3b04c5aa7cfd8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f249c3c6f884440db96d66715cd48247", - "IPY_MODEL_9f6b243e854d40de8339d5920ea9b2c8", - "IPY_MODEL_981710df13494b958e9e3fb366bd53d4" - ], - "layout": "IPY_MODEL_269c0b2820b44ba7844f451dc6b7ce62" - } - }, - "ef39075183f74473bbfcfb08af09c484": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9f3f65b2cc274246ba55ca32bef5fc10", - "placeholder": "​", - "style": "IPY_MODEL_ff70550ed86146049a48faae2f9bf134", - "value": " 7600/7600 [00:34<00:00, 167.51it/s]" - } - }, - "ef745b03cfec4cd793b852d29185d042": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f0fab85f8e8d4d1ca46bfbe14d50fe57": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f249c3c6f884440db96d66715cd48247": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d4bbeddf2bf04fc88c02995657c58410", - "placeholder": "​", - "style": "IPY_MODEL_5ab6d3b8e01b4858bcd2ef38475265c0", - "value": "Epoch: 100%" - } - }, - "f773cd8ad7df481e9e606e245c37c575": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_a9eb439d911346c1bc523d1719f96c72", - "IPY_MODEL_03fc89e7ed73469f9641f0e27ff60920", - "IPY_MODEL_371ebf2451ca4045aa8c7c29aa80b81f" - ], - "layout": "IPY_MODEL_ffc76bd466ed46518d5511ceb0cb9086" - } - }, - "ff70550ed86146049a48faae2f9bf134": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ffc76bd466ed46518d5511ceb0cb9086": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - } + "ffc76bd466ed46518d5511ceb0cb9086": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/_source/tutorials/notebooks/monitoring-textclassification-shaptransformersinterpret-explainability.ipynb b/docs/_source/tutorials/notebooks/monitoring-textclassification-shaptransformersinterpret-explainability.ipynb index e5a67f323a..ea654f1b14 100644 --- a/docs/_source/tutorials/notebooks/monitoring-textclassification-shaptransformersinterpret-explainability.ipynb +++ b/docs/_source/tutorials/notebooks/monitoring-textclassification-shaptransformersinterpret-explainability.ipynb @@ -105,15 +105,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -122,6 +119,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -133,7 +131,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -175,6 +173,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -185,14 +184,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -260,21 +263,25 @@ "# Define the explainer using transformers_interpret\n", "cls_explainer = SequenceClassificationExplainer(model, tokenizer)\n", "\n", + "\n", "# Remove overlapping ##tokens\n", "def merge_word_attributions(word_attributions):\n", - " sentence =[]\n", + " sentence = []\n", " i = 0\n", " for attribution in word_attributions:\n", " word = attribution[0]\n", - " score = attribution[1]\n", - " \n", + " score = attribution[1]\n", + "\n", " if \"##\" in word:\n", - " previous_word = sentence[i-1][0]\n", - " previous_score = sentence[i-1][1]\n", - " sentence[i-1] = (previous_word+ word[2:], previous_score if abs(previous_score) > abs(score) else score)\n", + " previous_word = sentence[i - 1][0]\n", + " previous_score = sentence[i - 1][1]\n", + " sentence[i - 1] = (\n", + " previous_word + word[2:],\n", + " previous_score if abs(previous_score) > abs(score) else score,\n", + " )\n", " else:\n", " sentence.append(attribution)\n", - " i+=1\n", + " i += 1\n", " return sentence\n", "\n", "\n", @@ -338,6 +345,7 @@ "outputs": [], "source": [ "from pygments.token import Text\n", + "\n", "# Let's use a sentiment classifier fine-tuned on sst\n", "model_name = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", "model = AutoModelForSequenceClassification.from_pretrained(model_name)\n", @@ -350,11 +358,7 @@ "\n", "# Transformers pipeline model\n", "pipeline = transformers.pipeline(\n", - " \"sentiment-analysis\", \n", - " model=model, \n", - " tokenizer=tokenizer, \n", - " device=device,\n", - " top_k=None\n", + " \"sentiment-analysis\", model=model, tokenizer=tokenizer, device=device, top_k=None\n", ")\n", "\n", "# Load Stanford treebank dataset only the first 100 records for testing\n", diff --git a/docs/_source/tutorials/notebooks/monitoring-textclassification-transformers-explainability.ipynb b/docs/_source/tutorials/notebooks/monitoring-textclassification-transformers-explainability.ipynb index 7d17bce6f4..72ffb45c6d 100644 --- a/docs/_source/tutorials/notebooks/monitoring-textclassification-transformers-explainability.ipynb +++ b/docs/_source/tutorials/notebooks/monitoring-textclassification-transformers-explainability.ipynb @@ -135,15 +135,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -152,6 +149,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -163,7 +161,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -190,11 +188,12 @@ "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", "from datasets import load_dataset, Dataset, Features, Value, ClassLabel\n", "from transformers.data.data_collator import DataCollatorWithPadding\n", - "import pandas as pd " + "import pandas as pd" ] }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -205,14 +204,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -246,7 +249,6 @@ }, "outputs": [], "source": [ - "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "# Load model and tokenizer\n", @@ -259,10 +261,12 @@ "# Load the training split\n", "ds = load_dataset(\"ag_news\", split=\"train\")\n", "\n", + "\n", "# Tokenize and encode the training set\n", "def tokenize_and_encode(batch):\n", " return tokenizer(batch[\"text\"], truncation=True)\n", "\n", + "\n", "ds_enc = ds.map(tokenize_and_encode, batched=True)" ] }, @@ -296,6 +300,7 @@ "# Create the data collator for inference\n", "data_collator = DataCollatorWithPadding(tokenizer, padding=True)\n", "\n", + "\n", "# Function to compute the loss example-wise\n", "def loss_per_example(batch):\n", " batch = data_collator(batch)\n", @@ -497,7 +502,7 @@ "\n", "# Add the text column removed by the trainer\n", "losses_df[\"text\"] = ds_enc[\"text\"]\n", - "losses_df.sort_values(\"loss\", ascending=False).head(10)\n" + "losses_df.sort_values(\"loss\", ascending=False).head(10)" ] }, { @@ -575,6 +580,7 @@ " annotation_agent=\"ag_news_benchmark\",\n", " )\n", "\n", + "\n", "# If you want to log the full dataset remove the indexing\n", "top_losses = losses_df.sort_values(\"loss\", ascending=False)[0:499]\n", "\n", diff --git a/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb b/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb index 4d55a25b92..7f8fd0effd 100644 --- a/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb +++ b/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb @@ -1,5094 +1,5110 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "CCsbozBWRESe" - }, - "source": [ - "# 🤔 **Fine-tunning a NER model with BERT for Beginners**\n", - "\n", - "\n", - "Are you a beginner? Do you want to learn, but don't know where to start? In this tutorial, you will learn to fine-tune a pre-trained BERT model for Named Entity Recognition. It will walk you through the following steps:\n", - "\n", - "- 🚀 Load your training dataset into Argilla and explore it using its tools.\n", - "- ⏳ Preprocess the data to generate the other inputs required by the model, and put them in a format that the model expects.\n", - "- 🔍 Download the BERT model and start to fine-tune it.\n", - "- 🧪 Perform your own tests!" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "CCsbozBWRESe" + }, + "source": [ + "# 🤔 **Fine-tunning a NER model with BERT for Beginners**\n", + "\n", + "\n", + "Are you a beginner? Do you want to learn, but don't know where to start? In this tutorial, you will learn to fine-tune a pre-trained BERT model for Named Entity Recognition. It will walk you through the following steps:\n", + "\n", + "- 🚀 Load your training dataset into Argilla and explore it using its tools.\n", + "- ⏳ Preprocess the data to generate the other inputs required by the model, and put them in a format that the model expects.\n", + "- 🔍 Download the BERT model and start to fine-tune it.\n", + "- 🧪 Perform your own tests!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "10I7EROxWPqy" + }, + "source": [ + "\"NER\"\n"," + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gs0Vhje60q4p" + }, + "source": [ + "## Introduction\n", + "\n", + "Our goal is to show from a trainig dataset how to fine-tune a **tiny BERT model** in order to identify **NER** tags.\n", + "\n", + "For this purpose, we will first connect to Argilla and log our [dataset](https://huggingface.co/datasets/argilla/spacy_sm_wnut17), so that we can analyse it in a more visual way.\n", + "\n", + ">💡 **Tip:** If you want to try with a different dataset than the one in this tutorial, but it's not yet annotated, Argilla has several tutorials on how to do it [manually](/practical_guides/annotate_dataset.html) or [automatically](https://docs.v1.argilla.io/en/latest/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.html#Appendix:-Log-datasets-to-the-Hugging-Face-Hub).\n", + "\n", + "\n", + "Next, we will preprocess our dataset and fine-tune the model. Here we will be using [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert), to make it easier to understand it and start *playing* with the parameters easily. However, there are still plenty of similar ones to [discover](https://huggingface.co/docs/transformers/index#bigtable).\n", + "\n", + "✨Let's get started!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WnKU83tp0q4q" + }, + "source": [ + "## Running Argilla\n", + "\n", + "For this tutorial, you will need to have an Argilla server running. There are two main options for deploying and running Argilla:\n", + "\n", + "1. [Deploy Argilla on Hugging Face Spaces](https://huggingface.co/docs/hub/spaces-sdks-docker-argilla): This is the fastest option and the recommended choice for connecting to external notebooks (e.g., Google Colab) if you have an account on Hugging Face.\n", + "\n", + "

\n", + " \n", + " \"deploy\n", + " \n", + "

\n", + "\n", + "2. [Launch Argilla using Argilla's quickstart Docker image](../../getting_started/quickstart.ipynb): This is the recommended option if you want Argilla running on your local machine. Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", + "\n", + "For more information on deployment options, please check the Deployment section of the documentation.\n", + "\n", + "
\n", + "\n", + ">🤯 **Tip**\n", + "\n", + "> This tutorial is a Jupyter Notebook. There are two options to run it:\n", + "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", + "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KBaIylKyh1mF" + }, + "source": [ + "## Setup\n", + "\n", + "For this tutorial, you'll need to install the Argilla client and a few third party libraries using `pip`:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "80Xv9vqx0q4r", + "outputId": "1aa71f15-ed36-4feb-c909-5c7feaeb638e" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "10I7EROxWPqy" - }, - "source": [ - "\"NER\"\n"," - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m238.1/238.1 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m214.7/214.7 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m385.3/385.3 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.6/51.6 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m525.6/525.6 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.9/69.9 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.7/2.7 MB\u001b[0m \u001b[31m25.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m33.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.3/64.3 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.6/69.6 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.6/49.6 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.1/143.1 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m593.7/593.7 kB\u001b[0m \u001b[31m29.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.9/142.9 kB\u001b[0m \u001b[31m20.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.1/51.1 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m428.8/428.8 kB\u001b[0m \u001b[31m36.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.1/4.1 MB\u001b[0m \u001b[31m49.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m50.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting datasets\n", + " Downloading datasets-2.14.4-py3-none-any.whl (519 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", + "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n", + "Collecting xxhash (from datasets)\n", + " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting multiprocess (from datasets)\n", + " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n", + "Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)\n", + " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.7.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.7.22)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", + "Installing collected packages: xxhash, dill, multiprocess, huggingface-hub, datasets\n", + "Successfully installed datasets-2.14.4 dill-0.3.7 huggingface-hub-0.16.4 multiprocess-0.70.15 xxhash-3.3.0\n", + "Collecting transformers\n", + " Downloading transformers-4.33.0-py3-none-any.whl (7.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", + "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n", + " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m48.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n", + " Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (4.7.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n", + "Installing collected packages: tokenizers, safetensors, transformers\n", + "Successfully installed safetensors-0.3.3 tokenizers-0.13.3 transformers-4.33.0\n", + "Collecting evaluate\n", + " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: datasets>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2.14.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.23.5)\n", + "Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.3.7)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from evaluate) (4.66.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from evaluate) (3.3.0)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.70.15)\n", + "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2023.6.0)\n", + "Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.16.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from evaluate) (23.1)\n", + "Collecting responses<0.19 (from evaluate)\n", + " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (9.0.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (3.8.5)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (6.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (3.12.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.7.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2023.7.22)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2023.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->evaluate) (1.16.0)\n", + "Installing collected packages: responses, evaluate\n", + "Successfully installed evaluate-0.4.0 responses-0.18.0\n", + "Collecting seqeval\n", + " Downloading seqeval-1.2.2.tar.gz (43 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.23.5)\n", + "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n", + "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.10.1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.2.0)\n", + "Building wheels for collected packages: seqeval\n", + " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=a3e4deed0ae4f82793ec07d332ea0faca9b72401ac85aa8047235d5fec9ef8ce\n", + " Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n", + "Successfully built seqeval\n", + "Installing collected packages: seqeval\n", + "Successfully installed seqeval-1.2.2\n", + "Requirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.33.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (3.12.2)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.16.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (23.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2023.6.3)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.31.0)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.13.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.3.3)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (4.66.1)\n", + "Requirement already satisfied: torch!=1.12.0,>=1.10 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.0.1+cu118)\n", + "Collecting accelerate>=0.20.3 (from transformers[torch])\n", + " Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.3->transformers[torch]) (5.9.5)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers[torch]) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers[torch]) (4.7.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.10->transformers[torch]) (3.27.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.10->transformers[torch]) (16.0.6)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2023.7.22)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.12.0,>=1.10->transformers[torch]) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.12.0,>=1.10->transformers[torch]) (1.3.0)\n", + "Installing collected packages: accelerate\n", + "Successfully installed accelerate-0.22.0\n", + "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.22.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", + "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.12.2)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.7.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.27.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (16.0.6)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n" + ] + } + ], + "source": [ + "%pip install \"argilla[server]==1.5.0\" -qqq\n", + "%pip install datasets\n", + "%pip install transformers\n", + "%pip install evaluate\n", + "%pip install seqeval\n", + "%pip install transformers[torch]\n", + "%pip install accelerate -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PhoCuW7y0q4s" + }, + "source": [ + "Let's import the Argilla module for reading and writing data:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "bZ5IcIyG0q4t" + }, + "outputs": [], + "source": [ + "import argilla as rg" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IO241Tyq0q4t" + }, + "source": [ + "If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "5Olsh_SY0q4t" + }, + "outputs": [], + "source": [ + "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", + "# Replace api_key if you configured a custom API key\n", + "# Replace workspace with the name of your workspace\n", + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Set the HF_TOKEN environment variable\n", + "# import os\n", + "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", + "\n", + "# # Replace api_url with the url to your HF Spaces URL\n", + "# # Replace api_key if you configured a custom API key\n", + "# # Replace workspace with the name of your workspace\n", + "# rg.init(\n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", + "# api_key=\"owner.apikey\",\n", + "# workspace=\"admin\",\n", + "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LKgEhWk70q4u" + }, + "source": [ + "Finally, let's include the imports we need:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "gW09SGfn0q4u" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import random\n", + "import evaluate\n", + "import transformers\n", + "import numpy as np\n", + "import torch\n", + "import pickle\n", + "\n", + "from datasets import load_dataset, ClassLabel, Sequence\n", + "from argilla.metrics.token_classification import top_k_mentions\n", + "from argilla.metrics.token_classification.metrics import Annotations\n", + "from IPython.display import display, HTML\n", + "from sklearn.model_selection import train_test_split\n", + "from transformers import (\n", + " AutoTokenizer,\n", + " AutoModelForTokenClassification,\n", + " TrainingArguments,\n", + " Trainer,\n", + " DataCollatorForTokenClassification,\n", + " pipeline,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from argilla.utils.telemetry import tutorial_running\n", + "\n", + " tutorial_running()\n", + "except ImportError:\n", + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n40-m2PXmcM4" + }, + "source": [ + "## 🚀 Exploring our dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6KIAb_u4v8vL" + }, + "source": [ + "First, we will load the train of our dataset from HuggingFace in order to explore it using ``load_dataset``. And, as we can see, it has 119 entries and two columns: one with the sequence of tokens and the other with the sequence of NER tags." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241, + "referenced_widgets": [ + "99e5a771b1c94d74bf5c3e70e15ad09f", + "acf78f08f05043cdb3273d5c92c0df31", + "f0b531f225234f79b4ead2fad8fe2751", + "6f5f5d31a6f749158440b374c0950f64", + "00b9010d8b794a7594813f6894e5abfd", + "367c275138534c858c9313a36e154b10", + "7cb7ee5d7ee245e0a45520ba3a813f0c", + "7dc978907a3b435f98cfe63107929efd", + "a87aec084388404a904b5e214f60eacc", + "194082cd09ec44f2941f3f76134fb96c", + "c65d24e786c34452ad44f04fa87a5131", + "082f49ae0ab64400aedb4a8f88936285", + "df53731b5ee5449d905f0a8e9ed0979f", + "4c695ba86c6c469ea313a0080dea76bd", + "41057edf94144db5933b68ac821ae98a", + "1d2943840f294a57b1e3636555a4e25d", + "0f26cdb761b74c58b5002d3e592df69a", + "34c747c3411d48a5be4d7b858c7f1897", + "01e3c0d891154f3c8a94c9d2b5dd9bc9", + "150aa7a7933b44edb0110f53602fe1a9", + "b9d99330970c43db9a803bad08e87686", + "35b994f5411444a2a6800072017013df", + "a6739364524849378c94c89a72083871", + "9de0c934fe3c48169110a5e9c44cf67a", + "1f1ac3e94d9a482c863fcca0b3ffd976", + "fb746c052b884ddfa5d47ab3e925a790", + "3e15aa279b304b3a81176762a144efee", + "ca226c7464284dd8ac497a021cb2cd79", + "d8e4d43b5dac4b859fd77ab1c63cee7e", + "23e87cd64e544db290cee8a8e491aaa6", + "533d34aac5984fcdac3599d217a19269", + "0e1f107885b446e484e10d7c2bb3148d", + "d831eeae6fce4c1e952d3dddff336c44", + "ac97b8345ac449b6aeb13f3815ba614e", + "34209e5556f045d1a8dcd42799cd5b07", + "0913a4edc88e46e1bcb78a2eef6dbacd", + "a8491ebf827e4829911cdb6f129dbd9b", + "297762bdcf7a40da868faba7ba524f33", + "34eb03a9d5154fc5994a57a432241dbd", + "70ac8fda9db8482aa23d1412f1026303", + "902d902b20c449a4aec44d90c66a44d9", + "5434b0b2eebf4d56a4e1b54875582166", + "4784d47e7f1140e29e97b9232d95e39d", + "9db8cd674fa343c68c4cdea7728bc508", + "18d8e0b8ddeb43f098437dbe8de71d63", + "9b1f8961077d4cb895858f069b901d6b", + "6a624ed9e6ed4e2d88b05578d1f44060", + "97f3c530ddea4d2a92fd24a7bd1f36f4", + "5b42770f877d40f9bb9054a642de3b02", + "d19247d43efa4f5395479c0091908812", + "6a31b14c2db641a8881bca8a33b59c54", + "488568ad39a54008bdb9a4b6f09f7952", + "0b3e30198b7e49c581a5f438f23fce59", + "bac5002e328b47e29605b584e8f3d35c", + "70bcb268780e439098031084dff765e2", + "65fe01641cca423facc645cba13f35cd", + "9b067923d08f496cb2bad20ad7ef4697", + "366b9176dec742589ed7b423bf51b492", + "f7cbcbd2d4594c6bae7c1f9aaf5d9106", + "f2644660164f44b38a05172b51459ec1", + "1c1c5e928d4641ac93cec178438487c0", + "219a467d434248128c8e91cec11b7a16", + "fc092df95b51432c9836b3e0ba7234f1", + "130efa006fb44ef5948ee6ee4f66882f", + "1b4b6a31b84641308d77ab04fc85912e", + "3fd2f15677bc4cb9adc119c6158d0ec0", + "59a012d1a99a4aa4b88cc76e83787e56", + "203df8ab6cc948c69ed8469eac9de357", + "3243d051548c4996b10c970f582fe9f7", + "4ded40486ee64bdb874906972b9deec8", + "ffdadb62b0d74b1ea816cbb1bb924d2e", + "8412fa5c43c44b02ae2693e62705385c", + "9b7068fd51bb463e88e65803dbbbbade", + "a3d8351283934fdcb6c57470a12df1a1", + "4831180eb2214af081dfc1469f6c4021", + "63701b6fe5d34289bb9bb55da3aa8c49", + "00d55e9d9f08467e81a5b77fdaa35c07" + ] }, + "id": "s_e7D8paZFDq", + "outputId": "cbe1688f-b354-4205-aabd-edfa5dac9374" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "gs0Vhje60q4p" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "99e5a771b1c94d74bf5c3e70e15ad09f", + "version_major": 2, + "version_minor": 0 }, - "source": [ - "## Introduction\n", - "\n", - "Our goal is to show from a trainig dataset how to fine-tune a **tiny BERT model** in order to identify **NER** tags.\n", - "\n", - "For this purpose, we will first connect to Argilla and log our [dataset](https://huggingface.co/datasets/argilla/spacy_sm_wnut17), so that we can analyse it in a more visual way.\n", - "\n", - ">💡 **Tip:** If you want to try with a different dataset than the one in this tutorial, but it's not yet annotated, Argilla has several tutorials on how to do it [manually](/practical_guides/annotate_dataset.html) or [automatically](https://docs.v1.argilla.io/en/latest/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.html#Appendix:-Log-datasets-to-the-Hugging-Face-Hub).\n", - "\n", - "\n", - "Next, we will preprocess our dataset and fine-tune the model. Here we will be using [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert), to make it easier to understand it and start *playing* with the parameters easily. However, there are still plenty of similar ones to [discover](https://huggingface.co/docs/transformers/index#bigtable).\n", - "\n", - "✨Let's get started!" + "text/plain": [ + "Downloading readme: 0%| | 0.00/1.29k [00:00\n", - " \n", - " \"deploy\n", - " \n", - "

\n", - "\n", - "2. [Launch Argilla using Argilla's quickstart Docker image](../../getting_started/quickstart.ipynb): This is the recommended option if you want Argilla running on your local machine. Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", - "\n", - "For more information on deployment options, please check the Deployment section of the documentation.\n", - "\n", - "
\n", - "\n", - ">🤯 **Tip**\n", - "\n", - "> This tutorial is a Jupyter Notebook. There are two options to run it:\n", - "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", - "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", - "
\n" + "text/plain": [ + "Downloading data files: 0%| | 0/2 [00:00=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", - "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n", - " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", - "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n", - "Collecting xxhash (from datasets)\n", - " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting multiprocess (from datasets)\n", - " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n", - "Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)\n", - " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.7.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.7.22)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", - "Installing collected packages: xxhash, dill, multiprocess, huggingface-hub, datasets\n", - "Successfully installed datasets-2.14.4 dill-0.3.7 huggingface-hub-0.16.4 multiprocess-0.70.15 xxhash-3.3.0\n", - "Collecting transformers\n", - " Downloading transformers-4.33.0-py3-none-any.whl (7.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", - "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n", - " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m48.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n", - " Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (2023.6.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (4.7.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n", - "Installing collected packages: tokenizers, safetensors, transformers\n", - "Successfully installed safetensors-0.3.3 tokenizers-0.13.3 transformers-4.33.0\n", - "Collecting evaluate\n", - " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: datasets>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2.14.4)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.23.5)\n", - "Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.3.7)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.5.3)\n", - "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from evaluate) (4.66.1)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from evaluate) (3.3.0)\n", - "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.70.15)\n", - "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2023.6.0)\n", - "Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.16.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from evaluate) (23.1)\n", - "Collecting responses<0.19 (from evaluate)\n", - " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (9.0.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (3.8.5)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (6.0.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (3.12.2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.7.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2023.7.22)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2023.3)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->evaluate) (1.16.0)\n", - "Installing collected packages: responses, evaluate\n", - "Successfully installed evaluate-0.4.0 responses-0.18.0\n", - "Collecting seqeval\n", - " Downloading seqeval-1.2.2.tar.gz (43 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.23.5)\n", - "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n", - "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.10.1)\n", - "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.3.2)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.2.0)\n", - "Building wheels for collected packages: seqeval\n", - " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=a3e4deed0ae4f82793ec07d332ea0faca9b72401ac85aa8047235d5fec9ef8ce\n", - " Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n", - "Successfully built seqeval\n", - "Installing collected packages: seqeval\n", - "Successfully installed seqeval-1.2.2\n", - "Requirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.33.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (3.12.2)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.16.4)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (1.23.5)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (23.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (6.0.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2023.6.3)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.31.0)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.13.3)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.3.3)\n", - "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (4.66.1)\n", - "Requirement already satisfied: torch!=1.12.0,>=1.10 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.0.1+cu118)\n", - "Collecting accelerate>=0.20.3 (from transformers[torch])\n", - " Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.3->transformers[torch]) (5.9.5)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers[torch]) (2023.6.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers[torch]) (4.7.1)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (1.12)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[torch]) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.10->transformers[torch]) (3.27.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.10->transformers[torch]) (16.0.6)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2023.7.22)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.12.0,>=1.10->transformers[torch]) (2.1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.12.0,>=1.10->transformers[torch]) (1.3.0)\n", - "Installing collected packages: accelerate\n", - "Successfully installed accelerate-0.22.0\n", - "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.22.0)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.23.5)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.1)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", - "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.12.2)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.7.1)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.27.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (16.0.6)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n" - ] - } - ], - "source": [ - "%pip install \"argilla[server]==1.5.0\" -qqq\n", - "%pip install datasets\n", - "%pip install transformers\n", - "%pip install evaluate\n", - "%pip install seqeval\n", - "%pip install transformers[torch]\n", - "%pip install accelerate -U" + "text/plain": [ + "Downloading data: 0%| | 0.00/5.03k [00:00 ⚠️ **Be careful:** Each execution will upload and add your annotations again without being overwritten." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "id": "K7WFV0vaHhyd" + }, + "outputs": [], + "source": [ + "# Create a function to read the sequences\n", + "def parse_entities(record):\n", + " current_entity = None # to check if current entity in process\n", + " current_info = [] # to save the information used in the tuple for the whole sentence\n", + " char_position = 0\n", + " entities = [] # final list to save the tuples\n", + "\n", + " # Iterate over the tokens and ner tags\n", + " for i in range(len(record[\"ner_tags\"])):\n", + " token = record[\"tokens\"][i]\n", + " ner_tag = dataset.features[\"ner_tags\"].feature.names[record[\"ner_tags\"][i]]\n", + "\n", + " if ner_tag.startswith(\"B-\"):\n", + " if current_entity:\n", + " current_info.append(current_entity)\n", + " current_entity = {\"word\": token, \"start\": char_position, \"tag\": ner_tag[2:]}\n", + " char_position += len(token) + 1\n", + "\n", + " elif ner_tag.startswith(\"I-\"):\n", + " if current_entity:\n", + " current_entity[\"word\"] += \" \" + token\n", + " char_position += len(token) + 1\n", + "\n", + " elif ner_tag == \"O\":\n", + " char_position += len(token) + 1\n", + "\n", + " # Add the last entity if it exists\n", + " if current_entity:\n", + " current_info.append(current_entity)\n", + "\n", + " # Calculate the end positions for each entity\n", + " for entity in current_info:\n", + " entity[\"end\"] = entity[\"start\"] + len(entity[\"word\"])\n", + "\n", + " for entity in current_info:\n", + " entities.append((entity[\"tag\"], entity[\"start\"], entity[\"end\"]))\n", + "\n", + " return entities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WsKBFFAiYv3D" + }, + "outputs": [], + "source": [ + "# Write a loop to iterate over each row of your dataset and add the text, tokens, and tuple\n", + "records = [\n", + " rg.TokenClassificationRecord(\n", + " text=\" \".join(row[\"tokens\"]),\n", + " tokens=row[\"tokens\"],\n", + " annotation=parse_entities(row),\n", + " )\n", + " for row in dataset\n", + "]\n", + "\n", + "# Log the records with the name of your choice\n", + "rg.log(records, \"spacy_sm_wnut17\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3Ydx5se82s_n" + }, + "source": [ + "So now you will be able to check your annotations in a much more visual way and even edit them if necessary.\n", + "\n", + "\"argilla-annotations\"\n",\n", + "\n", + "In addition, **Argilla** also has more options, e.g. to extract [metrics](https://docs.v1.argilla.io/en/latest/reference/python/python_metrics.html) such as the one shown below.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 }, + "id": "uIg23A-uRX94", + "outputId": "0a9fed8e-6ae8-4a96-c40f-420f61f55c73" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Select the dataset from Argilla and visualize the data\n", + "top_k_mentions(\n", + " name=\"spacy_sm_wnut17\", k=30, threshold=2, compute_for=Annotations\n", + ").visualize()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ypCRJhX35ltp" + }, + "source": [ + "## ⏳ Preprocessing the data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ffpxCD1M8mFO" + }, + "source": [ + "Next, we will **pre-process our data** in the required format so that the model can work with it. In our case, we will reload them from HuggingFace, as in Argilla we only loaded the train set, however, this is also possible.\n", + "\n", + "The following code would allow us to prepare our data using Argilla, this is especially useful for manual annotations, as it adds **B-** (beggining) or **I-** (inside) to our NER tags automatically depending on their position.\n", + "\n", + "```python\n", + "dataset = rg.load(\"dataset_name\").prepare_for_training()\n", + "\n", + "dataset = dataset.train_test_split()\n", + "```\n", + "\n", + "> 🤯 **Tip:** In our case, we are working with a very small dataset that is divided into train and test. However, you may are using another dataset that already have the ``validation`` partition, or even if it is larger, you can create this partition yourself with the following code:\n", + "\n", + "```python\n", + "dataset['train'], dataset['validation'] = dataset['train'].train_test_split(.1).values()\n", + "```\n", + "So, let's continue!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 240, + "referenced_widgets": [ + "368c173b5ae942aca2136dfe7968e527", + "b27a1ac0f80443a18f51002768ef5883", + "ba728a77e57243e4b6a58c09d1d68c19", + "84cf7979444c46409f547fc34fe5959f", + "25731b7b5adf4f62953468b72d4dc746", + "c7c4843952414605b4a3a526ff32ad4f", + "215f5ed3a7ab450d9bfc87d0e92c306d", + "a0d5d3e4e2b24345aa002b6fdcd961a1", + "3a2ce5219b5349be87932029056fb270", + "eda3a7954d6a49418962197d7b120072", + "3732ff34dde142e1a934ef62b4f2f4f9" + ] }, + "id": "pl1MwBhxkQEm", + "outputId": "7296bdd5-652d-477b-9696-cee344341902" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Set the HF_TOKEN environment variable\n", - "# import os\n", - "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", - "\n", - "# # Replace api_url with the url to your HF Spaces URL\n", - "# # Replace api_key if you configured a custom API key\n", - "# # Replace workspace with the name of your workspace\n", - "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", - "# api_key=\"owner.apikey\",\n", - "# workspace=\"admin\",\n", - "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", - "# )" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:datasets.builder:Found cached dataset parquet (/root/.cache/huggingface/datasets/argilla___parquet/argilla--spacy_sm_wnut17-1babd564207f27f8/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "LKgEhWk70q4u" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "368c173b5ae942aca2136dfe7968e527", + "version_major": 2, + "version_minor": 0 }, - "source": [ - "Finally, let's include the imports we need:" + "text/plain": [ + " 0%| | 0/2 [00:00 💡 **Tip:** If you are using HuggingFace it may be easier for you to save your model there directly. To do so, use the following code and add the following parameters to TrainingArguments.\n", + "\n", + "```python\n", + "from huggingface_hub import notebook_login\n", + "notebook_login()\n", + "\n", + "# Add the following parameter\n", + "training_args = TrainingArguments(\n", + " save_strategy=\"epoch\",\n", + " load_best_model_at_end=True,\n", + " push_to_hub=True,\n", + ")\n", + "```\n", + "> 🕹️ **Let's play:** What is the best accuracy you can get?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O9lz1LbVN3OW" + }, + "outputs": [], + "source": [ + "training_args = TrainingArguments(\n", + " output_dir=\"ner-recognition\",\n", + " learning_rate=2e-4,\n", + " per_device_train_batch_size=32,\n", + " per_device_eval_batch_size=32,\n", + " num_train_epochs=20,\n", + " weight_decay=0.05,\n", + " evaluation_strategy=\"epoch\",\n", + " optim=\"adamw_torch\",\n", + " logging_steps=50,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7SFjM6KGN3s3" + }, + "source": [ + "### Metrics\n", + "\n", + "To know how our training has gone, of course, we must use metrics. Therefore, we will use ``Seqeval`` and a function that computes precision, recall, F1 and accuracy from the actual and predicted tags." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "93d7c54a35494535a500550d73d22acb", + "ea8acd336a384c8793651542d6bb5167", + "9867f7c33e24416faf662ec5366af2f1", + "e023fa533e1a4059b8087be03f81ac9a", + "de682ca9943648579f74c077dec8bcfc", + "75db171714734f49b05273ba2d42be11", + "1ced5374042343c48b7523e0db3b05ec", + "9e837cd94af54d01a4ba079f16b881bf", + "3a0a83c63061434887f2b9760fdf5281", + "a309e1df709042d2a3e31df5f3426367", + "fe27af60738749bda14625b756362aef" + ] }, + "id": "Jr559HdeN6Q9", + "outputId": "dac3b0d0-173a-4b70-a2cf-d4306eecfd6e" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 241, - "referenced_widgets": [ - "99e5a771b1c94d74bf5c3e70e15ad09f", - "acf78f08f05043cdb3273d5c92c0df31", - "f0b531f225234f79b4ead2fad8fe2751", - "6f5f5d31a6f749158440b374c0950f64", - "00b9010d8b794a7594813f6894e5abfd", - "367c275138534c858c9313a36e154b10", - "7cb7ee5d7ee245e0a45520ba3a813f0c", - "7dc978907a3b435f98cfe63107929efd", - "a87aec084388404a904b5e214f60eacc", - "194082cd09ec44f2941f3f76134fb96c", - "c65d24e786c34452ad44f04fa87a5131", - "082f49ae0ab64400aedb4a8f88936285", - "df53731b5ee5449d905f0a8e9ed0979f", - "4c695ba86c6c469ea313a0080dea76bd", - "41057edf94144db5933b68ac821ae98a", - "1d2943840f294a57b1e3636555a4e25d", - "0f26cdb761b74c58b5002d3e592df69a", - "34c747c3411d48a5be4d7b858c7f1897", - "01e3c0d891154f3c8a94c9d2b5dd9bc9", - "150aa7a7933b44edb0110f53602fe1a9", - "b9d99330970c43db9a803bad08e87686", - "35b994f5411444a2a6800072017013df", - "a6739364524849378c94c89a72083871", - "9de0c934fe3c48169110a5e9c44cf67a", - "1f1ac3e94d9a482c863fcca0b3ffd976", - "fb746c052b884ddfa5d47ab3e925a790", - "3e15aa279b304b3a81176762a144efee", - "ca226c7464284dd8ac497a021cb2cd79", - "d8e4d43b5dac4b859fd77ab1c63cee7e", - "23e87cd64e544db290cee8a8e491aaa6", - "533d34aac5984fcdac3599d217a19269", - "0e1f107885b446e484e10d7c2bb3148d", - "d831eeae6fce4c1e952d3dddff336c44", - "ac97b8345ac449b6aeb13f3815ba614e", - "34209e5556f045d1a8dcd42799cd5b07", - "0913a4edc88e46e1bcb78a2eef6dbacd", - "a8491ebf827e4829911cdb6f129dbd9b", - "297762bdcf7a40da868faba7ba524f33", - "34eb03a9d5154fc5994a57a432241dbd", - "70ac8fda9db8482aa23d1412f1026303", - "902d902b20c449a4aec44d90c66a44d9", - "5434b0b2eebf4d56a4e1b54875582166", - "4784d47e7f1140e29e97b9232d95e39d", - "9db8cd674fa343c68c4cdea7728bc508", - "18d8e0b8ddeb43f098437dbe8de71d63", - "9b1f8961077d4cb895858f069b901d6b", - "6a624ed9e6ed4e2d88b05578d1f44060", - "97f3c530ddea4d2a92fd24a7bd1f36f4", - "5b42770f877d40f9bb9054a642de3b02", - "d19247d43efa4f5395479c0091908812", - "6a31b14c2db641a8881bca8a33b59c54", - "488568ad39a54008bdb9a4b6f09f7952", - "0b3e30198b7e49c581a5f438f23fce59", - "bac5002e328b47e29605b584e8f3d35c", - "70bcb268780e439098031084dff765e2", - "65fe01641cca423facc645cba13f35cd", - "9b067923d08f496cb2bad20ad7ef4697", - "366b9176dec742589ed7b423bf51b492", - "f7cbcbd2d4594c6bae7c1f9aaf5d9106", - "f2644660164f44b38a05172b51459ec1", - "1c1c5e928d4641ac93cec178438487c0", - "219a467d434248128c8e91cec11b7a16", - "fc092df95b51432c9836b3e0ba7234f1", - "130efa006fb44ef5948ee6ee4f66882f", - "1b4b6a31b84641308d77ab04fc85912e", - "3fd2f15677bc4cb9adc119c6158d0ec0", - "59a012d1a99a4aa4b88cc76e83787e56", - "203df8ab6cc948c69ed8469eac9de357", - "3243d051548c4996b10c970f582fe9f7", - "4ded40486ee64bdb874906972b9deec8", - "ffdadb62b0d74b1ea816cbb1bb924d2e", - "8412fa5c43c44b02ae2693e62705385c", - "9b7068fd51bb463e88e65803dbbbbade", - "a3d8351283934fdcb6c57470a12df1a1", - "4831180eb2214af081dfc1469f6c4021", - "63701b6fe5d34289bb9bb55da3aa8c49", - "00d55e9d9f08467e81a5b77fdaa35c07" - ] - }, - "id": "s_e7D8paZFDq", - "outputId": "cbe1688f-b354-4205-aabd-edfa5dac9374" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "93d7c54a35494535a500550d73d22acb", + "version_major": 2, + "version_minor": 0 }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "99e5a771b1c94d74bf5c3e70e15ad09f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading readme: 0%| | 0.00/1.29k [00:00\n", + " \n", + " \n", + " [80/80 00:12, Epoch 20/20]\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation LossPrecisionRecallF1Accuracy
1No log1.4458350.0000000.0000000.0000000.720751
2No log1.5403810.0000000.0000000.0000000.720751
3No log1.3009410.0000000.0000000.0000000.720751
4No log1.2591190.0000000.0000000.0000000.720751
5No log1.2565420.4444440.0254780.0481930.720751
6No log1.1540500.2027030.0955410.1298700.736203
7No log1.3884630.2545450.0891720.1320750.718543
8No log1.2462350.2753620.1210190.1681420.737307
9No log1.2547870.2020200.1273890.1562500.731788
10No log1.3885490.2727270.1719750.2109380.735099
11No log1.4946270.2976190.1592360.2074690.740618
12No log1.3313030.2325580.1910830.2097900.746137
130.6753001.4731910.2522520.1783440.2089550.748344
140.6753001.5667830.2755100.1719750.2117650.742826
150.6753001.5001710.2523360.1719750.2045450.739514
160.6753001.5419460.2743360.1974520.2296300.742826
170.6753001.5463470.2583330.1974520.2238270.745033
180.6753001.5341000.2711860.2038220.2327270.743929
190.6753001.5350950.2773110.2101910.2391300.745033
200.6753001.5393030.2773110.2101910.2391300.745033

" ], - "source": [ - "dataset" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "ITgB-mzZz3vX" - }, - "source": [ - "Next, we will use the following code taking advantage of the ``DatasetDict`` option ``Features`` to convert it to the format required by Argilla in order to log it.\n", - "\n", - "The three elements that our data must have for Token Classifications are the following:\n", - "\n", - "* **text**: the complete string.\n", - "* **tokens**: the sequence of tokens.\n", - "* **annotation**: a tuple formed by the tag, the start position and the end position.\n", - "\n", - "> ⚠️ **Be careful:** Each execution will upload and add your annotations again without being overwritten." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n" + ] }, { - "cell_type": "code", - "execution_count": 79, - "metadata": { - "id": "K7WFV0vaHhyd" - }, - "outputs": [], - "source": [ - "# Create a function to read the sequences\n", - "def parse_entities(record):\n", - " current_entity = None # to check if current entity in process\n", - " current_info = [] # to save the information used in the tuple for the whole sentence\n", - " char_position = 0\n", - " entities = [] # final list to save the tuples\n", - "\n", - " # Iterate over the tokens and ner tags\n", - " for i in range(len(record[\"ner_tags\"])):\n", - " token = record[\"tokens\"][i]\n", - " ner_tag = dataset.features[\"ner_tags\"].feature.names[record[\"ner_tags\"][i]]\n", - "\n", - " if ner_tag.startswith(\"B-\"):\n", - " if current_entity:\n", - " current_info.append(current_entity)\n", - " current_entity = {\"word\": token, \"start\": char_position, \"tag\": ner_tag[2:]}\n", - " char_position += len(token) + 1\n", - "\n", - " elif ner_tag.startswith(\"I-\"):\n", - " if current_entity:\n", - " current_entity[\"word\"] += \" \" + token\n", - " char_position += len(token) + 1\n", - "\n", - " elif ner_tag == \"O\":\n", - " char_position += len(token) + 1\n", - "\n", - " # Add the last entity if it exists\n", - " if current_entity:\n", - " current_info.append(current_entity)\n", - "\n", - " # Calculate the end positions for each entity\n", - " for entity in current_info:\n", - " entity[\"end\"] = entity[\"start\"] + len(entity[\"word\"])\n", - "\n", - " for entity in current_info:\n", - " entities.append((entity[\"tag\"], entity[\"start\"], entity[\"end\"]))\n", - "\n", - " return entities" + "data": { + "text/plain": [ + "TrainOutput(global_step=80, training_loss=0.45428856909275056, metrics={'train_runtime': 14.9864, 'train_samples_per_second': 158.811, 'train_steps_per_second': 5.338, 'total_flos': 32769159790410.0, 'train_loss': 0.45428856909275056, 'epoch': 20.0})" ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=tokenized_dataset[\"train\"],\n", + " eval_dataset=tokenized_dataset[\"test\"],\n", + " tokenizer=tokenizer,\n", + " data_collator=data_collator,\n", + " compute_metrics=compute_metrics,\n", + ")\n", + "\n", + "# Train.\n", + "trainer.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZCCiyS9llFq6" + }, + "source": [ + "The `evaluate` method will allow you to evaluate again on the validation set or on another dataset (e.g. if you have train, validation and test)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 193 }, + "id": "tsX1Z9r_PjT1", + "outputId": "daf2665e-5bb7-4948-8267-788b503c07e9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WsKBFFAiYv3D" - }, - "outputs": [], - "source": [ - "# Write a loop to iterate over each row of your dataset and add the text, tokens, and tuple\n", - "records = [\n", - " rg.TokenClassificationRecord(\n", - " text=\" \".join(row[\"tokens\"]),\n", - " tokens=row[\"tokens\"],\n", - " annotation=parse_entities(row),\n", - " )\n", - " for row in dataset\n", - "]\n", - "\n", - "# Log the records with the name of your choice\n", - "rg.log(records, \"spacy_sm_wnut17\")" + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [1/1 : < :]\n", + "
\n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "3Ydx5se82s_n" - }, - "source": [ - "So now you will be able to check your annotations in a much more visual way and even edit them if necessary.\n", - "\n", - "\"argilla-annotations\"\n",\n", - "\n", - "In addition, **Argilla** also has more options, e.g. to extract [metrics](https://docs.v1.argilla.io/en/latest/reference/python/python_metrics.html) such as the one shown below.\n", - "\n", - "\n" + "data": { + "text/plain": [ + "{'eval_loss': 1.5393034219741821,\n", + " 'eval_precision': 0.2773109243697479,\n", + " 'eval_recall': 0.21019108280254778,\n", + " 'eval_f1': 0.2391304347826087,\n", + " 'eval_accuracy': 0.7450331125827815,\n", + " 'eval_runtime': 0.0918,\n", + " 'eval_samples_per_second': 326.934,\n", + " 'eval_steps_per_second': 10.898,\n", + " 'epoch': 20.0}" ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer.evaluate()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0NYZJgRORyjm" + }, + "source": [ + "> 🔮 **Try to predict**\n", + "\n", + "> When you have created your model and are happy with it, test it yourself with your own text.\n", + "\n", + "```python\n", + "# Replace this with the directory where it was saved\n", + "model_checkpoint = \"your-path\"\n", + "token_classifier = pipeline(\"token-classification\", model=model_checkpoint, aggregation_strategy=\"simple\")\n", + "token_classifier(\"I heard Madrid is wonderful in spring.\")\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4IDnpIN5ZE6u" + }, + "source": [ + "## 📝✔️ Summary\n", + "\n", + "In this tutorial, we have learned how to upload our training dataset to Argilla in order to visualise the data it contains and the NER tags it uses and how to fine-tune a BERT model for NER recognition using ``transformers``. This can be very useful to learn the basics of BERT pre-models and, from there, to develop your skills further and try out different ones that may give better results.\n", + "\n", + "💪Cheers!" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "00b9010d8b794a7594813f6894e5abfd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 542 - }, - "id": "uIg23A-uRX94", - "outputId": "0a9fed8e-6ae8-4a96-c40f-420f61f55c73" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "00d55e9d9f08467e81a5b77fdaa35c07": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "011e772f270e4e3b819fc093928b36b7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_76ac206691ad4795b168534e69ed5a56", + "IPY_MODEL_ac140d48d0984ca59e5e4fc437eff946", + "IPY_MODEL_527332d8360b4df293452d0fcced938d" ], - "source": [ - "# Select the dataset from Argilla and visualize the data\n", - "top_k_mentions(\n", - " name=\"spacy_sm_wnut17\", k=30, threshold=2, compute_for=Annotations\n", - ").visualize()" - ] + "layout": "IPY_MODEL_25b47d1f214340baa662328b090f2b6a" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "ypCRJhX35ltp" - }, - "source": [ - "## ⏳ Preprocessing the data" - ] + "01e3c0d891154f3c8a94c9d2b5dd9bc9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "ffpxCD1M8mFO" - }, - "source": [ - "Next, we will **pre-process our data** in the required format so that the model can work with it. In our case, we will reload them from HuggingFace, as in Argilla we only loaded the train set, however, this is also possible.\n", - "\n", - "The following code would allow us to prepare our data using Argilla, this is especially useful for manual annotations, as it adds **B-** (beggining) or **I-** (inside) to our NER tags automatically depending on their position.\n", - "\n", - "```python\n", - "dataset = rg.load(\"dataset_name\").prepare_for_training()\n", - "\n", - "dataset = dataset.train_test_split()\n", - "```\n", - "\n", - "> 🤯 **Tip:** In our case, we are working with a very small dataset that is divided into train and test. However, you may are using another dataset that already have the ``validation`` partition, or even if it is larger, you can create this partition yourself with the following code:\n", - "\n", - "```python\n", - "dataset['train'], dataset['validation'] = dataset['train'].train_test_split(.1).values()\n", - "```\n", - "So, let's continue!" - ] + "082f49ae0ab64400aedb4a8f88936285": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_df53731b5ee5449d905f0a8e9ed0979f", + "IPY_MODEL_4c695ba86c6c469ea313a0080dea76bd", + "IPY_MODEL_41057edf94144db5933b68ac821ae98a" + ], + "layout": "IPY_MODEL_1d2943840f294a57b1e3636555a4e25d" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 240, - "referenced_widgets": [ - "368c173b5ae942aca2136dfe7968e527", - "b27a1ac0f80443a18f51002768ef5883", - "ba728a77e57243e4b6a58c09d1d68c19", - "84cf7979444c46409f547fc34fe5959f", - "25731b7b5adf4f62953468b72d4dc746", - "c7c4843952414605b4a3a526ff32ad4f", - "215f5ed3a7ab450d9bfc87d0e92c306d", - "a0d5d3e4e2b24345aa002b6fdcd961a1", - "3a2ce5219b5349be87932029056fb270", - "eda3a7954d6a49418962197d7b120072", - "3732ff34dde142e1a934ef62b4f2f4f9" - ] - }, - "id": "pl1MwBhxkQEm", - "outputId": "7296bdd5-652d-477b-9696-cee344341902" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:datasets.builder:Found cached dataset parquet (/root/.cache/huggingface/datasets/argilla___parquet/argilla--spacy_sm_wnut17-1babd564207f27f8/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "368c173b5ae942aca2136dfe7968e527", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/2 [00:00 💡 **Tip:** If you are using HuggingFace it may be easier for you to save your model there directly. To do so, use the following code and add the following parameters to TrainingArguments.\n", - "\n", - "```python\n", - "from huggingface_hub import notebook_login\n", - "notebook_login()\n", - "\n", - "# Add the following parameter\n", - "training_args = TrainingArguments(\n", - " save_strategy=\"epoch\",\n", - " load_best_model_at_end=True,\n", - " push_to_hub=True,\n", - ")\n", - "```\n", - "> 🕹️ **Let's play:** What is the best accuracy you can get?" - ] + "6a31b14c2db641a8881bca8a33b59c54": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "O9lz1LbVN3OW" - }, - "outputs": [], - "source": [ - "training_args = TrainingArguments(\n", - " output_dir=\"ner-recognition\",\n", - " learning_rate=2e-4,\n", - " per_device_train_batch_size=32,\n", - " per_device_eval_batch_size=32,\n", - " num_train_epochs=20,\n", - " weight_decay=0.05,\n", - " evaluation_strategy=\"epoch\",\n", - " optim=\"adamw_torch\",\n", - " logging_steps = 50\n", - ")" - ] + "6a624ed9e6ed4e2d88b05578d1f44060": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_488568ad39a54008bdb9a4b6f09f7952", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0b3e30198b7e49c581a5f438f23fce59", + "value": 2 + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "7SFjM6KGN3s3" - }, - "source": [ - "### Metrics\n", - "\n", - "To know how our training has gone, of course, we must use metrics. Therefore, we will use ``Seqeval`` and a function that computes precision, recall, F1 and accuracy from the actual and predicted tags." - ] + "6f5f5d31a6f749158440b374c0950f64": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_194082cd09ec44f2941f3f76134fb96c", + "placeholder": "​", + "style": "IPY_MODEL_c65d24e786c34452ad44f04fa87a5131", + "value": " 1.29k/1.29k [00:00<00:00, 44.2kB/s]" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 49, - "referenced_widgets": [ - "93d7c54a35494535a500550d73d22acb", - "ea8acd336a384c8793651542d6bb5167", - "9867f7c33e24416faf662ec5366af2f1", - "e023fa533e1a4059b8087be03f81ac9a", - "de682ca9943648579f74c077dec8bcfc", - "75db171714734f49b05273ba2d42be11", - "1ced5374042343c48b7523e0db3b05ec", - "9e837cd94af54d01a4ba079f16b881bf", - "3a0a83c63061434887f2b9760fdf5281", - "a309e1df709042d2a3e31df5f3426367", - "fe27af60738749bda14625b756362aef" - ] - }, - "id": "Jr559HdeN6Q9", - "outputId": "dac3b0d0-173a-4b70-a2cf-d4306eecfd6e" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "93d7c54a35494535a500550d73d22acb", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading builder script: 0%| | 0.00/6.34k [00:00\n", - " \n", - " \n", - " [80/80 00:12, Epoch 20/20]\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
EpochTraining LossValidation LossPrecisionRecallF1Accuracy
1No log1.4458350.0000000.0000000.0000000.720751
2No log1.5403810.0000000.0000000.0000000.720751
3No log1.3009410.0000000.0000000.0000000.720751
4No log1.2591190.0000000.0000000.0000000.720751
5No log1.2565420.4444440.0254780.0481930.720751
6No log1.1540500.2027030.0955410.1298700.736203
7No log1.3884630.2545450.0891720.1320750.718543
8No log1.2462350.2753620.1210190.1681420.737307
9No log1.2547870.2020200.1273890.1562500.731788
10No log1.3885490.2727270.1719750.2109380.735099
11No log1.4946270.2976190.1592360.2074690.740618
12No log1.3313030.2325580.1910830.2097900.746137
130.6753001.4731910.2522520.1783440.2089550.748344
140.6753001.5667830.2755100.1719750.2117650.742826
150.6753001.5001710.2523360.1719750.2045450.739514
160.6753001.5419460.2743360.1974520.2296300.742826
170.6753001.5463470.2583330.1974520.2238270.745033
180.6753001.5341000.2711860.2038220.2327270.743929
190.6753001.5350950.2773110.2101910.2391300.745033
200.6753001.5393030.2773110.2101910.2391300.745033

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "TrainOutput(global_step=80, training_loss=0.45428856909275056, metrics={'train_runtime': 14.9864, 'train_samples_per_second': 158.811, 'train_steps_per_second': 5.338, 'total_flos': 32769159790410.0, 'train_loss': 0.45428856909275056, 'epoch': 20.0})" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } + "9867f7c33e24416faf662ec5366af2f1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9e837cd94af54d01a4ba079f16b881bf", + "max": 6338, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3a0a83c63061434887f2b9760fdf5281", + "value": 6338 + } + }, + "99e5a771b1c94d74bf5c3e70e15ad09f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_acf78f08f05043cdb3273d5c92c0df31", + "IPY_MODEL_f0b531f225234f79b4ead2fad8fe2751", + "IPY_MODEL_6f5f5d31a6f749158440b374c0950f64" ], - "source": [ - "trainer = Trainer(\n", - " model=model,\n", - " args=training_args,\n", - " train_dataset=tokenized_dataset[\"train\"],\n", - " eval_dataset=tokenized_dataset[\"test\"],\n", - " tokenizer=tokenizer,\n", - " data_collator=data_collator,\n", - " compute_metrics=compute_metrics,\n", - ")\n", - "\n", - "# Train.\n", - "trainer.train()" - ] + "layout": "IPY_MODEL_00b9010d8b794a7594813f6894e5abfd" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZCCiyS9llFq6" - }, - "source": [ - "The `evaluate` method will allow you to evaluate again on the validation set or on another dataset (e.g. if you have train, validation and test)." - ] + "9b067923d08f496cb2bad20ad7ef4697": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1c1c5e928d4641ac93cec178438487c0", + "placeholder": "​", + "style": "IPY_MODEL_219a467d434248128c8e91cec11b7a16", + "value": "Generating train split: 100%" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 193 - }, - "id": "tsX1Z9r_PjT1", - "outputId": "daf2665e-5bb7-4948-8267-788b503c07e9" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "

\n", - " \n", - " \n", - " [1/1 : < :]\n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "{'eval_loss': 1.5393034219741821,\n", - " 'eval_precision': 0.2773109243697479,\n", - " 'eval_recall': 0.21019108280254778,\n", - " 'eval_f1': 0.2391304347826087,\n", - " 'eval_accuracy': 0.7450331125827815,\n", - " 'eval_runtime': 0.0918,\n", - " 'eval_samples_per_second': 326.934,\n", - " 'eval_steps_per_second': 10.898,\n", - " 'epoch': 20.0}" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } + "9b1f8961077d4cb895858f069b901d6b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d19247d43efa4f5395479c0091908812", + "placeholder": "​", + "style": "IPY_MODEL_6a31b14c2db641a8881bca8a33b59c54", + "value": "Extracting data files: 100%" + } + }, + "9b7068fd51bb463e88e65803dbbbbade": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9db8cd674fa343c68c4cdea7728bc508": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9de0c934fe3c48169110a5e9c44cf67a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ca226c7464284dd8ac497a021cb2cd79", + "placeholder": "​", + "style": "IPY_MODEL_d8e4d43b5dac4b859fd77ab1c63cee7e", + "value": "Downloading data: 100%" + } + }, + "9e837cd94af54d01a4ba079f16b881bf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a0d5d3e4e2b24345aa002b6fdcd961a1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a309e1df709042d2a3e31df5f3426367": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a3d8351283934fdcb6c57470a12df1a1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a6739364524849378c94c89a72083871": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9de0c934fe3c48169110a5e9c44cf67a", + "IPY_MODEL_1f1ac3e94d9a482c863fcca0b3ffd976", + "IPY_MODEL_fb746c052b884ddfa5d47ab3e925a790" ], - "source": [ - "trainer.evaluate()" - ] + "layout": "IPY_MODEL_3e15aa279b304b3a81176762a144efee" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "0NYZJgRORyjm" - }, - "source": [ - "> 🔮 **Try to predict**\n", - "\n", - "> When you have created your model and are happy with it, test it yourself with your own text.\n", - "\n", - "```python\n", - "# Replace this with the directory where it was saved\n", - "model_checkpoint = \"your-path\"\n", - "token_classifier = pipeline(\"token-classification\", model=model_checkpoint, aggregation_strategy=\"simple\")\n", - "token_classifier(\"I heard Madrid is wonderful in spring.\")\n", - "\n", - "```" - ] + "a8491ebf827e4829911cdb6f129dbd9b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4784d47e7f1140e29e97b9232d95e39d", + "placeholder": "​", + "style": "IPY_MODEL_9db8cd674fa343c68c4cdea7728bc508", + "value": " 5.03k/5.03k [00:00<00:00, 21.5kB/s]" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "4IDnpIN5ZE6u" - }, - "source": [ - "## 📝✔️ Summary\n", - "\n", - "In this tutorial, we have learned how to upload our training dataset to Argilla in order to visualise the data it contains and the NER tags it uses and how to fine-tune a BERT model for NER recognition using ``transformers``. This can be very useful to learn the basics of BERT pre-models and, from there, to develop your skills further and try out different ones that may give better results.\n", - "\n", - "💪Cheers!" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "provenance": [] - }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "00b9010d8b794a7594813f6894e5abfd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "00d55e9d9f08467e81a5b77fdaa35c07": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "011e772f270e4e3b819fc093928b36b7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_76ac206691ad4795b168534e69ed5a56", - "IPY_MODEL_ac140d48d0984ca59e5e4fc437eff946", - "IPY_MODEL_527332d8360b4df293452d0fcced938d" - ], - "layout": "IPY_MODEL_25b47d1f214340baa662328b090f2b6a" - } - }, - "01e3c0d891154f3c8a94c9d2b5dd9bc9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "082f49ae0ab64400aedb4a8f88936285": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_df53731b5ee5449d905f0a8e9ed0979f", - "IPY_MODEL_4c695ba86c6c469ea313a0080dea76bd", - "IPY_MODEL_41057edf94144db5933b68ac821ae98a" - ], - "layout": "IPY_MODEL_1d2943840f294a57b1e3636555a4e25d" - } - }, - "0913a4edc88e46e1bcb78a2eef6dbacd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_902d902b20c449a4aec44d90c66a44d9", - "max": 5033, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5434b0b2eebf4d56a4e1b54875582166", - "value": 5033 - } - }, - "0b3e30198b7e49c581a5f438f23fce59": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "0e1f107885b446e484e10d7c2bb3148d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0f26cdb761b74c58b5002d3e592df69a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "130efa006fb44ef5948ee6ee4f66882f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "150aa7a7933b44edb0110f53602fe1a9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "18d8e0b8ddeb43f098437dbe8de71d63": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9b1f8961077d4cb895858f069b901d6b", - "IPY_MODEL_6a624ed9e6ed4e2d88b05578d1f44060", - "IPY_MODEL_97f3c530ddea4d2a92fd24a7bd1f36f4" - ], - "layout": "IPY_MODEL_5b42770f877d40f9bb9054a642de3b02" - } - }, - "194082cd09ec44f2941f3f76134fb96c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1b4b6a31b84641308d77ab04fc85912e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1c1c5e928d4641ac93cec178438487c0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1ced5374042343c48b7523e0db3b05ec": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1d2943840f294a57b1e3636555a4e25d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1f1ac3e94d9a482c863fcca0b3ffd976": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_23e87cd64e544db290cee8a8e491aaa6", - "max": 14232, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_533d34aac5984fcdac3599d217a19269", - "value": 14232 - } - }, - "203df8ab6cc948c69ed8469eac9de357": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8412fa5c43c44b02ae2693e62705385c", - "placeholder": "​", - "style": "IPY_MODEL_9b7068fd51bb463e88e65803dbbbbade", - "value": "Generating test split: 100%" - } - }, - "215f5ed3a7ab450d9bfc87d0e92c306d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "219a467d434248128c8e91cec11b7a16": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "23e87cd64e544db290cee8a8e491aaa6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "25731b7b5adf4f62953468b72d4dc746": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "25b47d1f214340baa662328b090f2b6a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "297762bdcf7a40da868faba7ba524f33": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3243d051548c4996b10c970f582fe9f7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a3d8351283934fdcb6c57470a12df1a1", - "max": 30, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4831180eb2214af081dfc1469f6c4021", - "value": 30 - } - }, - "34209e5556f045d1a8dcd42799cd5b07": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_34eb03a9d5154fc5994a57a432241dbd", - "placeholder": "​", - "style": "IPY_MODEL_70ac8fda9db8482aa23d1412f1026303", - "value": "Downloading data: 100%" - } - }, - "34c747c3411d48a5be4d7b858c7f1897": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "34eb03a9d5154fc5994a57a432241dbd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "35b994f5411444a2a6800072017013df": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "366b9176dec742589ed7b423bf51b492": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fc092df95b51432c9836b3e0ba7234f1", - "max": 119, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_130efa006fb44ef5948ee6ee4f66882f", - "value": 119 - } - }, - "367c275138534c858c9313a36e154b10": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "368c173b5ae942aca2136dfe7968e527": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b27a1ac0f80443a18f51002768ef5883", - "IPY_MODEL_ba728a77e57243e4b6a58c09d1d68c19", - "IPY_MODEL_84cf7979444c46409f547fc34fe5959f" - ], - "layout": "IPY_MODEL_25731b7b5adf4f62953468b72d4dc746" - } - }, - "3732ff34dde142e1a934ef62b4f2f4f9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3a0a83c63061434887f2b9760fdf5281": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "3a2ce5219b5349be87932029056fb270": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "3e15aa279b304b3a81176762a144efee": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3fd2f15677bc4cb9adc119c6158d0ec0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "41057edf94144db5933b68ac821ae98a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b9d99330970c43db9a803bad08e87686", - "placeholder": "​", - "style": "IPY_MODEL_35b994f5411444a2a6800072017013df", - "value": " 2/2 [00:00<00:00, 3.23it/s]" - } - }, - "4784d47e7f1140e29e97b9232d95e39d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4831180eb2214af081dfc1469f6c4021": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "488568ad39a54008bdb9a4b6f09f7952": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c695ba86c6c469ea313a0080dea76bd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_01e3c0d891154f3c8a94c9d2b5dd9bc9", - "max": 2, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_150aa7a7933b44edb0110f53602fe1a9", - "value": 2 - } - }, - "4ded40486ee64bdb874906972b9deec8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_63701b6fe5d34289bb9bb55da3aa8c49", - "placeholder": "​", - "style": "IPY_MODEL_00d55e9d9f08467e81a5b77fdaa35c07", - "value": " 30/30 [00:00<00:00, 610.05 examples/s]" - } - }, - "527332d8360b4df293452d0fcced938d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b503a56aec3a4b09ae141e0af4adac23", - "placeholder": "​", - "style": "IPY_MODEL_7651cd2a101248f59a4216ef50027e90", - "value": " 0/30 [00:00<?, ? examples/s]" - } - }, - "533d34aac5984fcdac3599d217a19269": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5434b0b2eebf4d56a4e1b54875582166": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "59a012d1a99a4aa4b88cc76e83787e56": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_203df8ab6cc948c69ed8469eac9de357", - "IPY_MODEL_3243d051548c4996b10c970f582fe9f7", - "IPY_MODEL_4ded40486ee64bdb874906972b9deec8" - ], - "layout": "IPY_MODEL_ffdadb62b0d74b1ea816cbb1bb924d2e" - } - }, - "5b42770f877d40f9bb9054a642de3b02": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "63701b6fe5d34289bb9bb55da3aa8c49": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "65fe01641cca423facc645cba13f35cd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9b067923d08f496cb2bad20ad7ef4697", - "IPY_MODEL_366b9176dec742589ed7b423bf51b492", - "IPY_MODEL_f7cbcbd2d4594c6bae7c1f9aaf5d9106" - ], - "layout": "IPY_MODEL_f2644660164f44b38a05172b51459ec1" - } - }, - "6a31b14c2db641a8881bca8a33b59c54": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6a624ed9e6ed4e2d88b05578d1f44060": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_488568ad39a54008bdb9a4b6f09f7952", - "max": 2, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0b3e30198b7e49c581a5f438f23fce59", - "value": 2 - } - }, - "6f5f5d31a6f749158440b374c0950f64": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_194082cd09ec44f2941f3f76134fb96c", - "placeholder": "​", - "style": "IPY_MODEL_c65d24e786c34452ad44f04fa87a5131", - "value": " 1.29k/1.29k [00:00<00:00, 44.2kB/s]" - } - }, - "70ac8fda9db8482aa23d1412f1026303": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "70bcb268780e439098031084dff765e2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "75db171714734f49b05273ba2d42be11": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7651cd2a101248f59a4216ef50027e90": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "76ac206691ad4795b168534e69ed5a56": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_eeb298e8c2d74439950325d4e01fc0c2", - "placeholder": "​", - "style": "IPY_MODEL_f148e78f04274ae9a6dca98e4cf6c832", - "value": "Map: 0%" - } - }, - "7cb7ee5d7ee245e0a45520ba3a813f0c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "7dc978907a3b435f98cfe63107929efd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8412fa5c43c44b02ae2693e62705385c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "84cf7979444c46409f547fc34fe5959f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_eda3a7954d6a49418962197d7b120072", - "placeholder": "​", - "style": "IPY_MODEL_3732ff34dde142e1a934ef62b4f2f4f9", - "value": " 2/2 [00:00<00:00, 106.82it/s]" - } - }, - "902d902b20c449a4aec44d90c66a44d9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "93d7c54a35494535a500550d73d22acb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ea8acd336a384c8793651542d6bb5167", - "IPY_MODEL_9867f7c33e24416faf662ec5366af2f1", - "IPY_MODEL_e023fa533e1a4059b8087be03f81ac9a" - ], - "layout": "IPY_MODEL_de682ca9943648579f74c077dec8bcfc" - } - }, - "97f3c530ddea4d2a92fd24a7bd1f36f4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bac5002e328b47e29605b584e8f3d35c", - "placeholder": "​", - "style": "IPY_MODEL_70bcb268780e439098031084dff765e2", - "value": " 2/2 [00:00<00:00, 34.36it/s]" - } - }, - "9867f7c33e24416faf662ec5366af2f1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9e837cd94af54d01a4ba079f16b881bf", - "max": 6338, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3a0a83c63061434887f2b9760fdf5281", - "value": 6338 - } - }, - "99e5a771b1c94d74bf5c3e70e15ad09f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_acf78f08f05043cdb3273d5c92c0df31", - "IPY_MODEL_f0b531f225234f79b4ead2fad8fe2751", - "IPY_MODEL_6f5f5d31a6f749158440b374c0950f64" - ], - "layout": "IPY_MODEL_00b9010d8b794a7594813f6894e5abfd" - } - }, - "9b067923d08f496cb2bad20ad7ef4697": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1c1c5e928d4641ac93cec178438487c0", - "placeholder": "​", - "style": "IPY_MODEL_219a467d434248128c8e91cec11b7a16", - "value": "Generating train split: 100%" - } - }, - "9b1f8961077d4cb895858f069b901d6b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d19247d43efa4f5395479c0091908812", - "placeholder": "​", - "style": "IPY_MODEL_6a31b14c2db641a8881bca8a33b59c54", - "value": "Extracting data files: 100%" - } - }, - "9b7068fd51bb463e88e65803dbbbbade": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9db8cd674fa343c68c4cdea7728bc508": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9de0c934fe3c48169110a5e9c44cf67a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ca226c7464284dd8ac497a021cb2cd79", - "placeholder": "​", - "style": "IPY_MODEL_d8e4d43b5dac4b859fd77ab1c63cee7e", - "value": "Downloading data: 100%" - } - }, - "9e837cd94af54d01a4ba079f16b881bf": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a0d5d3e4e2b24345aa002b6fdcd961a1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a309e1df709042d2a3e31df5f3426367": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a3d8351283934fdcb6c57470a12df1a1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a6739364524849378c94c89a72083871": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9de0c934fe3c48169110a5e9c44cf67a", - "IPY_MODEL_1f1ac3e94d9a482c863fcca0b3ffd976", - "IPY_MODEL_fb746c052b884ddfa5d47ab3e925a790" - ], - "layout": "IPY_MODEL_3e15aa279b304b3a81176762a144efee" - } - }, - "a8491ebf827e4829911cdb6f129dbd9b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4784d47e7f1140e29e97b9232d95e39d", - "placeholder": "​", - "style": "IPY_MODEL_9db8cd674fa343c68c4cdea7728bc508", - "value": " 5.03k/5.03k [00:00<00:00, 21.5kB/s]" - } - }, - "a87aec084388404a904b5e214f60eacc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ac140d48d0984ca59e5e4fc437eff946": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d22c976637264867b789a32857ffc717", - "max": 30, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_cf6968f163624bb49e9f2fe5fb557b2d", - "value": 30 - } - }, - "ac97b8345ac449b6aeb13f3815ba614e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_34209e5556f045d1a8dcd42799cd5b07", - "IPY_MODEL_0913a4edc88e46e1bcb78a2eef6dbacd", - "IPY_MODEL_a8491ebf827e4829911cdb6f129dbd9b" - ], - "layout": "IPY_MODEL_297762bdcf7a40da868faba7ba524f33" - } - }, - "acf78f08f05043cdb3273d5c92c0df31": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_367c275138534c858c9313a36e154b10", - "placeholder": "​", - "style": "IPY_MODEL_7cb7ee5d7ee245e0a45520ba3a813f0c", - "value": "Downloading readme: 100%" - } - }, - "b27a1ac0f80443a18f51002768ef5883": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c7c4843952414605b4a3a526ff32ad4f", - "placeholder": "​", - "style": "IPY_MODEL_215f5ed3a7ab450d9bfc87d0e92c306d", - "value": "100%" - } - }, - "b503a56aec3a4b09ae141e0af4adac23": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b9d99330970c43db9a803bad08e87686": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ba728a77e57243e4b6a58c09d1d68c19": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a0d5d3e4e2b24345aa002b6fdcd961a1", - "max": 2, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3a2ce5219b5349be87932029056fb270", - "value": 2 - } - }, - "bac5002e328b47e29605b584e8f3d35c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c65d24e786c34452ad44f04fa87a5131": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c7c4843952414605b4a3a526ff32ad4f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ca226c7464284dd8ac497a021cb2cd79": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cf6968f163624bb49e9f2fe5fb557b2d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d19247d43efa4f5395479c0091908812": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d22c976637264867b789a32857ffc717": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d831eeae6fce4c1e952d3dddff336c44": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d8e4d43b5dac4b859fd77ab1c63cee7e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "de682ca9943648579f74c077dec8bcfc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "df53731b5ee5449d905f0a8e9ed0979f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0f26cdb761b74c58b5002d3e592df69a", - "placeholder": "​", - "style": "IPY_MODEL_34c747c3411d48a5be4d7b858c7f1897", - "value": "Downloading data files: 100%" - } - }, - "e023fa533e1a4059b8087be03f81ac9a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a309e1df709042d2a3e31df5f3426367", - "placeholder": "​", - "style": "IPY_MODEL_fe27af60738749bda14625b756362aef", - "value": " 6.34k/6.34k [00:00<00:00, 238kB/s]" - } - }, - "ea8acd336a384c8793651542d6bb5167": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_75db171714734f49b05273ba2d42be11", - "placeholder": "​", - "style": "IPY_MODEL_1ced5374042343c48b7523e0db3b05ec", - "value": "Downloading builder script: 100%" - } - }, - "eda3a7954d6a49418962197d7b120072": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "eeb298e8c2d74439950325d4e01fc0c2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f0b531f225234f79b4ead2fad8fe2751": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7dc978907a3b435f98cfe63107929efd", - "max": 1288, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a87aec084388404a904b5e214f60eacc", - "value": 1288 - } - }, - "f148e78f04274ae9a6dca98e4cf6c832": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f2644660164f44b38a05172b51459ec1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f7cbcbd2d4594c6bae7c1f9aaf5d9106": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1b4b6a31b84641308d77ab04fc85912e", - "placeholder": "​", - "style": "IPY_MODEL_3fd2f15677bc4cb9adc119c6158d0ec0", - "value": " 119/119 [00:00<00:00, 926.36 examples/s]" - } - }, - "fb746c052b884ddfa5d47ab3e925a790": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0e1f107885b446e484e10d7c2bb3148d", - "placeholder": "​", - "style": "IPY_MODEL_d831eeae6fce4c1e952d3dddff336c44", - "value": " 14.2k/14.2k [00:00<00:00, 38.4kB/s]" - } - }, - "fc092df95b51432c9836b3e0ba7234f1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fe27af60738749bda14625b756362aef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ffdadb62b0d74b1ea816cbb1bb924d2e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - } + "a87aec084388404a904b5e214f60eacc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ac140d48d0984ca59e5e4fc437eff946": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d22c976637264867b789a32857ffc717", + "max": 30, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cf6968f163624bb49e9f2fe5fb557b2d", + "value": 30 + } + }, + "ac97b8345ac449b6aeb13f3815ba614e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_34209e5556f045d1a8dcd42799cd5b07", + "IPY_MODEL_0913a4edc88e46e1bcb78a2eef6dbacd", + "IPY_MODEL_a8491ebf827e4829911cdb6f129dbd9b" + ], + "layout": "IPY_MODEL_297762bdcf7a40da868faba7ba524f33" + } + }, + "acf78f08f05043cdb3273d5c92c0df31": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_367c275138534c858c9313a36e154b10", + "placeholder": "​", + "style": "IPY_MODEL_7cb7ee5d7ee245e0a45520ba3a813f0c", + "value": "Downloading readme: 100%" + } + }, + "b27a1ac0f80443a18f51002768ef5883": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c7c4843952414605b4a3a526ff32ad4f", + "placeholder": "​", + "style": "IPY_MODEL_215f5ed3a7ab450d9bfc87d0e92c306d", + "value": "100%" + } + }, + "b503a56aec3a4b09ae141e0af4adac23": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b9d99330970c43db9a803bad08e87686": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ba728a77e57243e4b6a58c09d1d68c19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a0d5d3e4e2b24345aa002b6fdcd961a1", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3a2ce5219b5349be87932029056fb270", + "value": 2 + } + }, + "bac5002e328b47e29605b584e8f3d35c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c65d24e786c34452ad44f04fa87a5131": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c7c4843952414605b4a3a526ff32ad4f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ca226c7464284dd8ac497a021cb2cd79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cf6968f163624bb49e9f2fe5fb557b2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d19247d43efa4f5395479c0091908812": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d22c976637264867b789a32857ffc717": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d831eeae6fce4c1e952d3dddff336c44": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d8e4d43b5dac4b859fd77ab1c63cee7e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "de682ca9943648579f74c077dec8bcfc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "df53731b5ee5449d905f0a8e9ed0979f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0f26cdb761b74c58b5002d3e592df69a", + "placeholder": "​", + "style": "IPY_MODEL_34c747c3411d48a5be4d7b858c7f1897", + "value": "Downloading data files: 100%" + } + }, + "e023fa533e1a4059b8087be03f81ac9a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a309e1df709042d2a3e31df5f3426367", + "placeholder": "​", + "style": "IPY_MODEL_fe27af60738749bda14625b756362aef", + "value": " 6.34k/6.34k [00:00<00:00, 238kB/s]" + } + }, + "ea8acd336a384c8793651542d6bb5167": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_75db171714734f49b05273ba2d42be11", + "placeholder": "​", + "style": "IPY_MODEL_1ced5374042343c48b7523e0db3b05ec", + "value": "Downloading builder script: 100%" + } + }, + "eda3a7954d6a49418962197d7b120072": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eeb298e8c2d74439950325d4e01fc0c2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f0b531f225234f79b4ead2fad8fe2751": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7dc978907a3b435f98cfe63107929efd", + "max": 1288, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a87aec084388404a904b5e214f60eacc", + "value": 1288 + } + }, + "f148e78f04274ae9a6dca98e4cf6c832": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f2644660164f44b38a05172b51459ec1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7cbcbd2d4594c6bae7c1f9aaf5d9106": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b4b6a31b84641308d77ab04fc85912e", + "placeholder": "​", + "style": "IPY_MODEL_3fd2f15677bc4cb9adc119c6158d0ec0", + "value": " 119/119 [00:00<00:00, 926.36 examples/s]" + } + }, + "fb746c052b884ddfa5d47ab3e925a790": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0e1f107885b446e484e10d7c2bb3148d", + "placeholder": "​", + "style": "IPY_MODEL_d831eeae6fce4c1e952d3dddff336c44", + "value": " 14.2k/14.2k [00:00<00:00, 38.4kB/s]" + } + }, + "fc092df95b51432c9836b3e0ba7234f1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fe27af60738749bda14625b756362aef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ffdadb62b0d74b1ea816cbb1bb924d2e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/_source/tutorials/notebooks/training-textclassification-classyclassification-activelearning.ipynb b/docs/_source/tutorials/notebooks/training-textclassification-classyclassification-activelearning.ipynb index 06aca04ff7..6f308e9d02 100644 --- a/docs/_source/tutorials/notebooks/training-textclassification-classyclassification-activelearning.ipynb +++ b/docs/_source/tutorials/notebooks/training-textclassification-classyclassification-activelearning.ipynb @@ -111,11 +111,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -139,7 +135,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -182,9 +178,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -257,11 +256,11 @@ "# Get some annotated examples per class\n", "n_samples_per_class = 5\n", "data = {\"World\": [], \"Sports\": [], \"Sci/Tech\": [], \"Business\": []}\n", - "while not all([len(value)== n_samples_per_class for key,value in data.items()]):\n", + "while not all([len(value) == n_samples_per_class for key, value in data.items()]):\n", " for idx, rec in enumerate(train_rg):\n", " if len(data[rec.annotation]) < n_samples_per_class:\n", " data[rec.annotation].append(rec.text)\n", - " \n", + "\n", "# Train a few-shot classifier\n", "classifier = ClassyClassifier(data=data, model=\"all-MiniLM-L6-v2\")\n", "classifier(\"This texts is about games, goals, matches and sports.\")" @@ -317,8 +316,9 @@ "settings = rg.TextClassificationSettings(label_schema=list(data.keys()))\n", "rg.configure_dataset_settings(name=DATASET_NAME, settings=settings)\n", "\n", + "\n", "# Evaluate and update records\n", - "def evaluate_records(records, idx = 0):\n", + "def evaluate_records(records, idx=0):\n", " texts = [rec.text for rec in records]\n", " predictions = [list(pred.items()) for pred in classifier.pipe(texts)]\n", " for pred, rec in zip(predictions, records):\n", @@ -330,6 +330,7 @@ " rec.metadata = {\"idx\": idx}\n", " return records\n", "\n", + "\n", "# Log initial predictions\n", "ds_slice = evaluate_records(ds[:NUM_SAMPLES_PER_LOOP])\n", "rg.log(ds[:NUM_SAMPLES_PER_LOOP], DATASET_NAME)" @@ -373,7 +374,7 @@ ")\n", "def active_learning_loop(records, ctx):\n", " idx = ctx.query_params[\"idx\"]\n", - " new_idx = idx+NUM_SAMPLES_PER_LOOP\n", + " new_idx = idx + NUM_SAMPLES_PER_LOOP\n", " print(\"1. train a few-shot classifier with validated data\")\n", " for rec in records:\n", " if rec.status == \"Validated\":\n", @@ -381,11 +382,11 @@ " classifier.set_training_data(loop_data)\n", "\n", " print(\"2. get new record predictions\")\n", - " ds_slice = ds[new_idx: new_idx+NUM_SAMPLES_PER_LOOP]\n", + " ds_slice = ds[new_idx : new_idx + NUM_SAMPLES_PER_LOOP]\n", " records_to_update = evaluate_records(ds_slice, new_idx)\n", " texts = [rec.text for rec in ds_slice]\n", " predictions = [list(pred.items()) for pred in classifier.pipe(texts)]\n", - " \n", + "\n", " print(\"3. update query params\")\n", " ctx.query_params[\"idx\"] = new_idx\n", "\n", diff --git a/docs/_source/tutorials/notebooks/training-textclassification-modal-activelearning.ipynb b/docs/_source/tutorials/notebooks/training-textclassification-modal-activelearning.ipynb index a52a6cacc6..5a7dd07d3d 100644 --- a/docs/_source/tutorials/notebooks/training-textclassification-modal-activelearning.ipynb +++ b/docs/_source/tutorials/notebooks/training-textclassification-modal-activelearning.ipynb @@ -115,15 +115,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -132,6 +129,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -143,7 +141,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -176,11 +174,12 @@ "import numpy as np\n", "from urllib import request\n", "from pathlib import Path\n", - "from tempfile import TemporaryDirectory\n" + "from tempfile import TemporaryDirectory" ] }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -191,14 +190,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -222,9 +225,8 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "train_df = pd.read_csv(\"data/active_learning/train.csv\")\n", - "test_df = pd.read_csv(\"data/active_learning/test.csv\")\n" + "test_df = pd.read_csv(\"data/active_learning/test.csv\")" ] }, { @@ -416,7 +418,7 @@ } ], "source": [ - "test_df\n" + "test_df" ] }, { @@ -452,9 +454,8 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Define our classification model\n", - "classifier = MultinomialNB()\n" + "classifier = MultinomialNB()" ] }, { @@ -472,11 +473,10 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Define active learner\n", "learner = ActiveLearner(\n", " estimator=classifier,\n", - ")\n" + ")" ] }, { @@ -496,12 +496,11 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# The resulting matrices will have the shape of (`nr of examples`, `nr of word n-grams`)\n", "vectorizer = CountVectorizer(ngram_range=(1, 5))\n", "\n", "X_train = vectorizer.fit_transform(train_df.CONTENT)\n", - "X_test = vectorizer.transform(test_df.CONTENT)\n" + "X_test = vectorizer.transform(test_df.CONTENT)" ] }, { @@ -540,7 +539,7 @@ "n_instances = 10\n", "\n", "# Accuracies after each iteration to keep track of our improvement\n", - "accuracies = []\n" + "accuracies = []" ] }, { @@ -561,7 +560,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Query examples from our training pool with the most uncertain prediction\n", "query_idx, query_inst = learner.query(X_train, n_instances=n_instances)\n", "\n", @@ -570,7 +568,7 @@ " probabilities = learner.predict_proba(X_train[query_idx])\n", "# For the very first query we do not have any predictions\n", "except NotFittedError:\n", - " probabilities = [[0.5, 0.5]] * n_instances\n" + " probabilities = [[0.5, 0.5]] * n_instances" ] }, { @@ -580,7 +578,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Build the Argilla records\n", "records = [\n", " rg.TextClassificationRecord(\n", @@ -593,7 +590,7 @@ "]\n", "\n", "# Log the records\n", - "rg.log(records, name=\"active_learning_tutorial\")\n" + "rg.log(records, name=\"active_learning_tutorial\")" ] }, { @@ -626,7 +623,7 @@ "for i, record in enumerate(records):\n", " record.annotation = \"HAM\" if i % 2 == 0 else \"SPAM\"\n", "rg.log(records, name=\"active_learning_tutorial\")\n", - "###\n" + "###" ] }, { @@ -661,7 +658,7 @@ "learner.teach(X=X_train[query_idx], y=y_train.to_list())\n", "\n", "# Keep track of our improvement\n", - "accuracies.append(learner.score(X=X_test, y=test_df.CLASS))\n" + "accuracies.append(learner.score(X=X_test, y=test_df.CLASS))" ] }, { @@ -703,11 +700,10 @@ } ], "source": [ - "\n", "# Plot the accuracy versus the iteration number\n", "plt.plot(accuracies)\n", "plt.xlabel(\"Number of iterations\")\n", - "plt.ylabel(\"Accuracy\")\n" + "plt.ylabel(\"Accuracy\")" ] }, { @@ -740,7 +736,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "n_iterations = 150\n", "n_instances = 10\n", "random_samples = 50\n", @@ -778,7 +773,7 @@ " accuracies.append(learner.score(X=X_test, y=test_df.CLASS))\n", " accuracies_rnd.append(accuracies)\n", "\n", - "arr_max, arr_rnd = np.array(accuracies_max), np.array(accuracies_rnd)\n" + "arr_max, arr_rnd = np.array(accuracies_max), np.array(accuracies_rnd)" ] }, { @@ -830,7 +825,7 @@ "plt.title(\"Sampling strategies: Max uncertainty vs random\")\n", "plt.xlabel(\"Number of annotation iterations\")\n", "plt.ylabel(\"Accuracy\")\n", - "plt.legend([\"max uncertainty\", \"random sampling\"], loc=4)\n" + "plt.legend([\"max uncertainty\", \"random sampling\"], loc=4)" ] }, { @@ -848,20 +843,21 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", "def load_data() -> pd.DataFrame:\n", " \"\"\"\n", " Downloads the [YouTube Spam Collection](http://www.dt.fee.unicamp.br/~tiago//youtubespamcollection/)\n", " and returns the data as a tuple with a train and test DataFrame.\n", " \"\"\"\n", - " links, data_df = [\n", - " \"http://lasid.sor.ufscar.br/labeling/datasets/9/download/\",\n", - " \"http://lasid.sor.ufscar.br/labeling/datasets/10/download/\",\n", - " \"http://lasid.sor.ufscar.br/labeling/datasets/11/download/\",\n", - " \"http://lasid.sor.ufscar.br/labeling/datasets/12/download/\",\n", - " \"http://lasid.sor.ufscar.br/labeling/datasets/13/download/\",\n", - " ], None\n", + " links, data_df = (\n", + " [\n", + " \"http://lasid.sor.ufscar.br/labeling/datasets/9/download/\",\n", + " \"http://lasid.sor.ufscar.br/labeling/datasets/10/download/\",\n", + " \"http://lasid.sor.ufscar.br/labeling/datasets/11/download/\",\n", + " \"http://lasid.sor.ufscar.br/labeling/datasets/12/download/\",\n", + " \"http://lasid.sor.ufscar.br/labeling/datasets/13/download/\",\n", + " ],\n", + " None,\n", + " )\n", "\n", " with TemporaryDirectory() as tmpdirname:\n", " dfs = []\n", @@ -880,7 +876,7 @@ "\n", "train_df, test_df = load_data()\n", "train_df.to_csv(\"data/active_learning/train.csv\", index=False)\n", - "test_df.to_csv(\"data/active_learning/test.csv\", index=False)\n" + "test_df.to_csv(\"data/active_learning/test.csv\", index=False)" ] } ], diff --git a/docs/_source/tutorials/notebooks/training-textclassification-setfit-fewshot.ipynb b/docs/_source/tutorials/notebooks/training-textclassification-setfit-fewshot.ipynb index f1aa0c8f3f..3e25439ac6 100644 --- a/docs/_source/tutorials/notebooks/training-textclassification-setfit-fewshot.ipynb +++ b/docs/_source/tutorials/notebooks/training-textclassification-setfit-fewshot.ipynb @@ -115,15 +115,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -132,6 +129,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -143,7 +141,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -174,6 +172,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -184,14 +183,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -217,7 +220,7 @@ "\n", "unlabelled = rg.DatasetForTextClassification.from_datasets(unlabelled)\n", "\n", - "rg.log(unlabelled, \"imdb_unlabelled\")\n" + "rg.log(unlabelled, \"imdb_unlabelled\")" ] }, { diff --git a/docs/_source/tutorials/notebooks/training-textclassification-setfit-sentiment.ipynb b/docs/_source/tutorials/notebooks/training-textclassification-setfit-sentiment.ipynb index 191e59ca44..7aa104c6ea 100644 --- a/docs/_source/tutorials/notebooks/training-textclassification-setfit-sentiment.ipynb +++ b/docs/_source/tutorials/notebooks/training-textclassification-setfit-sentiment.ipynb @@ -110,15 +110,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -127,6 +124,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +136,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -170,6 +168,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -180,14 +179,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { diff --git a/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb b/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb index 3d3eab42ba..30752d4245 100644 --- a/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb +++ b/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb @@ -131,15 +131,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -148,6 +145,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -159,7 +157,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -168,6 +166,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -178,14 +177,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -219,7 +222,7 @@ "source": [ "import datasets\n", "\n", - "trec = datasets.load_dataset(\"trec\", version=datasets.Version(\"2.0.0\"))\n" + "trec = datasets.load_dataset(\"trec\", version=datasets.Version(\"2.0.0\"))" ] }, { @@ -258,7 +261,7 @@ " transformer_model = \"bert-base-uncased\"\n", "\n", "# Init tokenizer\n", - "tokenizer = AutoTokenizer.from_pretrained(transformer_model)\n" + "tokenizer = AutoTokenizer.from_pretrained(transformer_model)" ] }, { @@ -290,7 +293,7 @@ "# Create the dataset for small-text\n", "dataset = TransformersDataset.from_arrays(\n", " train_text, train_labels, tokenizer, target_labels=target_labels\n", - ")\n" + ")" ] }, { @@ -314,7 +317,7 @@ "\n", "dataset_test = TransformersDataset.from_arrays(\n", " test_text, test_labels, tokenizer, target_labels=np.arange(num_classes)\n", - ")\n" + ")" ] }, { @@ -363,7 +366,7 @@ "clf_factory = TransformerBasedClassificationFactory(\n", " TransformerModelArguments(transformer_model),\n", " num_classes=6,\n", - " kwargs={\"verbosity\": 100}\n", + " kwargs={\"verbosity\": 100},\n", " # If you have a cuda device, specify it here.\n", " # Otherwise, just remove the following line.\n", " # kwargs={\"device\": \"cuda\"}\n", @@ -373,7 +376,7 @@ "query_strategy = BreakingTies()\n", "\n", "# Use the active learner with a pool containing all unlabeled data\n", - "active_learner = PoolBasedActiveLearner(clf_factory, query_strategy, dataset)\n" + "active_learner = PoolBasedActiveLearner(clf_factory, query_strategy, dataset)" ] }, { @@ -403,7 +406,7 @@ "NUM_SAMPLES = 20\n", "\n", "# Randomly draw an initial subset from the data pool\n", - "initial_indices = random_initialization(dataset, NUM_SAMPLES)\n" + "initial_indices = random_initialization(dataset, NUM_SAMPLES)" ] }, { @@ -455,7 +458,7 @@ "]\n", "\n", "# Log the initial records to Argilla\n", - "rg.log(records, DATASET_NAME)\n" + "rg.log(records, DATASET_NAME)" ] }, { @@ -484,6 +487,7 @@ "LABEL2INT = trec[\"train\"].features[\"coarse_label\"].str2int\n", "ACCURACIES = []\n", "\n", + "\n", "# Set up the active learning loop with the listener decorator\n", "@listener(\n", " dataset=DATASET_NAME,\n", @@ -537,7 +541,7 @@ " ctx.query_params[\"batch_id\"] = new_batch\n", " print(\"Done!\")\n", "\n", - " print(\"Waiting for annotations ...\")\n" + " print(\"Waiting for annotations ...\")" ] }, { @@ -563,7 +567,7 @@ "metadata": {}, "outputs": [], "source": [ - "active_learning_loop.start()\n" + "active_learning_loop.start()" ] }, { @@ -606,7 +610,7 @@ "source": [ "import pandas as pd\n", "\n", - "pd.Series(ACCURACIES).plot(xlabel=\"Iteration\", ylabel=\"Accuracy\")\n" + "pd.Series(ACCURACIES).plot(xlabel=\"Iteration\", ylabel=\"Accuracy\")" ] }, { @@ -625,7 +629,7 @@ "metadata": {}, "outputs": [], "source": [ - "active_learning_loop.stop()\n" + "active_learning_loop.stop()" ] }, { diff --git a/docs/_source/tutorials/notebooks/training-textclassification-transformers-pretrained.ipynb b/docs/_source/tutorials/notebooks/training-textclassification-transformers-pretrained.ipynb index 84590d7082..de51f12138 100644 --- a/docs/_source/tutorials/notebooks/training-textclassification-transformers-pretrained.ipynb +++ b/docs/_source/tutorials/notebooks/training-textclassification-transformers-pretrained.ipynb @@ -118,15 +118,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -135,6 +132,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -146,7 +144,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -181,6 +179,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -191,14 +190,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -338,6 +341,7 @@ "def predict(examples):\n", " return {\"predictions\": sentiment_classifier(examples[\"text\"], truncation=True)}\n", "\n", + "\n", "# Add .select(range(10)) before map if you just want to test this quickly with 10 examples\n", "to_label1 = to_label1.map(predict, batched=True, batch_size=4)" ] @@ -385,7 +389,7 @@ " prediction_agent=\"distilbert-base-uncased-finetuned-sst-2-english\",\n", " )\n", " records.append(record)\n", - " \n", + "\n", "rg.log(name=\"labeling_with_pretrained\", records=records)" ] }, @@ -603,9 +607,11 @@ " \"distilbert-base-uncased-finetuned-sst-2-english\"\n", ")\n", "\n", + "\n", "def tokenize_function(examples):\n", " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n", "\n", + "\n", "tokenized_train_ds = train_ds.map(tokenize_function, batched=True)\n", "\n", "# Split the data into a training and evaluation set\n", @@ -646,11 +652,13 @@ "\n", "metric = load_metric(\"accuracy\")\n", "\n", + "\n", "def compute_metrics(eval_pred):\n", " logits, labels = eval_pred\n", " predictions = np.argmax(logits, axis=-1)\n", " return metric.compute(predictions=predictions, references=labels)\n", "\n", + "\n", "trainer = Trainer(\n", " args=training_args,\n", " model=model,\n", @@ -720,9 +728,12 @@ "metadata": {}, "outputs": [], "source": [ - "finetuned_sentiment_classifier(\n", - " \"I need to deposit my virtual card, how do i do that.\"\n", - "), sentiment_classifier(\"I need to deposit my virtual card, how do i do that.\")" + "(\n", + " finetuned_sentiment_classifier(\n", + " \"I need to deposit my virtual card, how do i do that.\"\n", + " ),\n", + " sentiment_classifier(\"I need to deposit my virtual card, how do i do that.\"),\n", + ")" ] }, { @@ -742,9 +753,10 @@ "metadata": {}, "outputs": [], "source": [ - "finetuned_sentiment_classifier(\n", - " \"Why is my payment still pending?\"\n", - "), sentiment_classifier(\"Why is my payment still pending?\")" + "(\n", + " finetuned_sentiment_classifier(\"Why is my payment still pending?\"),\n", + " sentiment_classifier(\"Why is my payment still pending?\"),\n", + ")" ] }, { @@ -770,6 +782,7 @@ "source": [ "rb_dataset = rg.load(name=\"labeling_with_pretrained\", query=\"status:Default\")\n", "\n", + "\n", "def predict(examples):\n", " texts = [example[\"text\"] for example in examples[\"inputs\"]]\n", " return {\n", @@ -778,6 +791,7 @@ " * len(texts),\n", " }\n", "\n", + "\n", "ds_dataset = rb_dataset.to_datasets().map(predict, batched=True, batch_size=8)\n", "\n", "records = rg.read_datasets(ds_dataset, task=\"TextClassification\")\n", diff --git a/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb b/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb index 947307ef31..020ece0da7 100644 --- a/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb +++ b/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb @@ -116,15 +116,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -133,6 +130,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -144,7 +142,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -183,12 +181,14 @@ "from unstructured.staging.argilla import stage_for_argilla\n", "\n", "import nltk\n", - "nltk.download('averaged_perceptron_tagger')\n", - "nltk.download('punkt')" + "\n", + "nltk.download(\"averaged_perceptron_tagger\")\n", + "nltk.download(\"punkt\")" ] }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -199,14 +199,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -234,6 +238,7 @@ "source": [ "ISW_BASE_URL = \"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment\"\n", "\n", + "\n", "def datetime_to_url(dt):\n", " month = dt.strftime(\"%B\").lower()\n", " return f\"{ISW_BASE_URL}-{month}-{dt.day}\"" @@ -274,8 +279,8 @@ " r = requests.get(url)\n", " if r.status_code != 200:\n", " return None\n", - " \n", - " elements = partition_html(text=r.text) \n", + "\n", + " elements = partition_html(text=r.text)\n", " return elements" ] }, @@ -302,13 +307,14 @@ " if element.text == \"Key Takeaways\":\n", " return idx\n", "\n", + "\n", "def get_key_takeaways(elements):\n", " key_takeaways_idx = _find_key_takeaways_idx(elements)\n", " if not key_takeaways_idx:\n", " return None\n", - " \n", + "\n", " takeaways = []\n", - " for element in elements[key_takeaways_idx + 1:]:\n", + " for element in elements[key_takeaways_idx + 1 :]:\n", " if not isinstance(element, ListItem):\n", " break\n", " takeaways.append(element)\n", @@ -362,12 +368,12 @@ "source": [ "def get_narrative(elements):\n", " narrative_text = \"\"\n", - " for element in elements: \n", + " for element in elements:\n", " if isinstance(element, NarrativeText) and len(element.text) > 500:\n", " # NOTE: Removes citations like [3] from the text\n", " element_text = re.sub(\"\\[\\d{1,3}\\]\", \"\", element.text)\n", " narrative_text += f\"\\n\\n{element_text}\"\n", - " \n", + "\n", " return NarrativeText(text=narrative_text.strip())" ] }, @@ -415,10 +421,10 @@ " elements = url_to_elements(url)\n", " if url is None or not elements:\n", " continue\n", - " \n", + "\n", " text = get_narrative(elements)\n", " annotation = get_key_takeaways(elements)\n", - " \n", + "\n", " if text and annotation:\n", " inputs.append(text)\n", " annotations.append(annotation.text)\n", @@ -677,7 +683,7 @@ "outputs": [], "source": [ "from transformers import AutoTokenizer\n", - " \n", + "\n", "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)" ] }, @@ -691,13 +697,16 @@ "max_input_length = 1024\n", "max_target_length = 128\n", "\n", + "\n", "def preprocess_function(examples):\n", " inputs = [doc for doc in examples[\"text\"]]\n", " model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True)\n", "\n", " # Set up the tokenizer for targets\n", " with tokenizer.as_target_tokenizer():\n", - " labels = tokenizer(examples[\"annotation\"], max_length=max_target_length, truncation=True)\n", + " labels = tokenizer(\n", + " examples[\"annotation\"], max_length=max_target_length, truncation=True\n", + " )\n", "\n", " model_inputs[\"labels\"] = labels[\"input_ids\"]\n", " return model_inputs" @@ -720,7 +729,12 @@ "metadata": {}, "outputs": [], "source": [ - "from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer\n", + "from transformers import (\n", + " AutoModelForSeq2SeqLM,\n", + " DataCollatorForSeq2Seq,\n", + " Seq2SeqTrainingArguments,\n", + " Seq2SeqTrainer,\n", + ")\n", "\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)" ] @@ -736,7 +750,7 @@ "model_name = model_checkpoint.split(\"/\")[-1]\n", "args = Seq2SeqTrainingArguments(\n", " \"t5-small-isw-summaries\",\n", - " evaluation_strategy = \"epoch\",\n", + " evaluation_strategy=\"epoch\",\n", " learning_rate=2e-5,\n", " per_device_train_batch_size=batch_size,\n", " per_device_eval_batch_size=batch_size,\n", @@ -804,8 +818,8 @@ "outputs": [], "source": [ "summarization_model = pipeline(\n", - "task=\"summarization\",\n", - "model=\"./t5-small-isw-summaries\",\n", + " task=\"summarization\",\n", + " model=\"./t5-small-isw-summaries\",\n", ")" ] }, diff --git a/docs/_source/tutorials_and_integrations/integrations/add_sentence_transformers_embeddings_as_vectors.ipynb b/docs/_source/tutorials_and_integrations/integrations/add_sentence_transformers_embeddings_as_vectors.ipynb index 080a7795d1..30a8c6aed7 100644 --- a/docs/_source/tutorials_and_integrations/integrations/add_sentence_transformers_embeddings_as_vectors.ipynb +++ b/docs/_source/tutorials_and_integrations/integrations/add_sentence_transformers_embeddings_as_vectors.ipynb @@ -92,7 +92,9 @@ "outputs": [], "source": [ "import argilla as rg\n", - "from argilla.client.feedback.integrations.sentencetransformers import SentenceTransformersExtractor" + "from argilla.client.feedback.integrations.sentencetransformers import (\n", + " SentenceTransformersExtractor,\n", + ")" ] }, { @@ -111,11 +113,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"argilla\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"argilla\")" ] }, { @@ -138,7 +136,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -161,9 +159,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -244,8 +245,8 @@ "source": [ "# Initialize the SentenceTransformersExtractor\n", "ste = SentenceTransformersExtractor(\n", - " model = \"TaylorAI/bge-micro-v2\", # Use a model from https://huggingface.co/models?library=sentence-transformers\n", - " show_progress = False,\n", + " model=\"TaylorAI/bge-micro-v2\", # Use a model from https://huggingface.co/models?library=sentence-transformers\n", + " show_progress=False,\n", ")" ] }, @@ -271,8 +272,8 @@ "# Update the records\n", "updated_records = ste.update_records(\n", " records=ds_remote.records,\n", - " fields=None, # Use all fields\n", - " overwrite=True, # Overwrite existing fields\n", + " fields=None, # Use all fields\n", + " overwrite=True, # Overwrite existing fields\n", ")" ] }, @@ -321,10 +322,10 @@ "source": [ "# Update the dataset\n", "ste.update_dataset(\n", - " dataset=ds_remote, \n", - " fields=[\"context\"], # Only update the context field\n", - " update_records=True, # Update the records in the dataset\n", - " overwrite=False, # Overwrite existing fields\n", + " dataset=ds_remote,\n", + " fields=[\"context\"], # Only update the context field\n", + " update_records=True, # Update the records in the dataset\n", + " overwrite=False, # Overwrite existing fields\n", ")" ] }, diff --git a/docs/_source/tutorials_and_integrations/integrations/add_text_descriptives_as_metadata.ipynb b/docs/_source/tutorials_and_integrations/integrations/add_text_descriptives_as_metadata.ipynb index 40590dcee3..f434dccfeb 100644 --- a/docs/_source/tutorials_and_integrations/integrations/add_text_descriptives_as_metadata.ipynb +++ b/docs/_source/tutorials_and_integrations/integrations/add_text_descriptives_as_metadata.ipynb @@ -99,7 +99,9 @@ "outputs": [], "source": [ "import argilla as rg\n", - "from argilla.client.feedback.integrations.textdescriptives import TextDescriptivesExtractor\n", + "from argilla.client.feedback.integrations.textdescriptives import (\n", + " TextDescriptivesExtractor,\n", + ")\n", "\n", "from datasets import load_dataset" ] @@ -120,11 +122,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -147,7 +145,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -170,9 +168,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -334,10 +335,10 @@ "source": [ "# Initialize the TextDescriptivesExtractor\n", "tde = TextDescriptivesExtractor(\n", - " model = \"en\",\n", - " metrics = None,\n", - " visible_for_annotators = False,\n", - " show_progress = True,\n", + " model=\"en\",\n", + " metrics=None,\n", + " visible_for_annotators=False,\n", + " show_progress=True,\n", ")" ] }, @@ -432,10 +433,10 @@ "source": [ "# Initialize the TextDescriptivesExtractor\n", "tde = TextDescriptivesExtractor(\n", - " model = \"en\",\n", - " metrics = [\"descriptive_stats\", \"readability\"],\n", - " visible_for_annotators = True,\n", - " show_progress = True,\n", + " model=\"en\",\n", + " metrics=[\"descriptive_stats\", \"readability\"],\n", + " visible_for_annotators=True,\n", + " show_progress=True,\n", ")" ] }, diff --git a/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb b/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb index c05673983d..3a610618d4 100644 --- a/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb +++ b/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb @@ -98,16 +98,17 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader, set_global_handler\n", + "from llama_index.core import (\n", + " VectorStoreIndex,\n", + " ServiceContext,\n", + " SimpleDirectoryReader,\n", + " set_global_handler,\n", + ")\n", "from llama_index.llms.openai import OpenAI\n", "\n", "import argilla as rg\n", "\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\",\n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/integrations/monitor_endpoints with_fastapi.ipynb b/docs/_source/tutorials_and_integrations/integrations/monitor_endpoints with_fastapi.ipynb index 025229b556..fdb997be76 100644 --- a/docs/_source/tutorials_and_integrations/integrations/monitor_endpoints with_fastapi.ipynb +++ b/docs/_source/tutorials_and_integrations/integrations/monitor_endpoints with_fastapi.ipynb @@ -128,10 +128,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -159,7 +156,7 @@ "\n", "# for adding logging to API endpoints\n", "from argilla.monitoring.asgi import (\n", - " ArgillaLogHTTPMiddleware, \n", + " ArgillaLogHTTPMiddleware,\n", " text_classification_mapper,\n", " token_classification_mapper,\n", ")\n", @@ -384,11 +381,12 @@ " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/transformers/\", # the endpoint that will be logged\n", " dataset=\"monitoring_transformers\", # your dataset name\n", - " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", + " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", ")" ] }, @@ -416,6 +414,7 @@ " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/spacy/\",\n", @@ -423,6 +422,7 @@ " records_mapper=token2records,\n", ")\n", "\n", + "\n", "# prediction endpoint using spacy pipeline\n", "@app.post(\"/ner/\")\n", "def predict_spacy(batch: List[str]):\n", @@ -535,8 +535,8 @@ "import requests\n", "\n", "response = requests.post(\n", - " \"http://localhost:8000/sentiment/\", \n", - " json=[\"I like Argilla\", \"I hated data labelling but now I don't\"]\n", + " \"http://localhost:8000/sentiment/\",\n", + " json=[\"I like Argilla\", \"I hated data labelling but now I don't\"],\n", ")\n", "\n", "response.content" @@ -583,7 +583,7 @@ "from transformers import pipeline\n", "\n", "from argilla.monitoring.asgi import (\n", - " ArgillaLogHTTPMiddleware, \n", + " ArgillaLogHTTPMiddleware,\n", " text_classification_mapper,\n", " token_classification_mapper,\n", ")\n", @@ -593,6 +593,7 @@ "\n", "app = FastAPI()\n", "\n", + "\n", "# prediction endpoint using transformers pipeline\n", "@app.post(\"/sentiment/\")\n", "def predict_transformers(batch: List[str]):\n", @@ -605,25 +606,29 @@ " for prediction in predictions\n", " ]\n", "\n", + "\n", "def text2records(batch: List[str], outputs: List[dict]):\n", " return [\n", " text_classification_mapper(data, prediction)\n", " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/transformers/\", # the endpoint that will be logged\n", " dataset=\"monitoring_transformers\", # your dataset name\n", - " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", + " records_mapper=text2records, # your post-process func to adapt service inputs and outputs into an Argilla record\n", ")\n", "\n", + "\n", "def token2records(batch: List[str], outputs: List[dict]):\n", " return [\n", " token_classification_mapper(data, prediction)\n", " for data, prediction in zip(batch, outputs)\n", " ]\n", "\n", + "\n", "# prediction endpoint using spacy pipeline\n", "@app.post(\"/ner/\")\n", "def predict_spacy(batch: List[str]):\n", @@ -643,6 +648,7 @@ " predictions.append(prediction)\n", " return predictions\n", "\n", + "\n", "app.add_middleware(\n", " ArgillaLogHTTPMiddleware,\n", " api_endpoint=\"/ner/\",\n", @@ -657,14 +663,13 @@ " records_mapper=text2records,\n", ")\n", "\n", + "\n", "@app.get(\"/\")\n", "def root():\n", " return {\"message\": \"alive\"}\n", "\n", - "argilla.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "\n", + "argilla.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] } ], diff --git a/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb b/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb index 01af30ac5f..1a83d5b03f 100644 --- a/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb +++ b/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb @@ -113,14 +113,12 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -129,6 +127,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -139,7 +138,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -177,7 +176,8 @@ "from unstructured.staging.argilla import stage_for_argilla\n", "\n", "import nltk\n", - "nltk.download('averaged_perceptron_tagger')" + "\n", + "nltk.download(\"averaged_perceptron_tagger\")" ] }, { @@ -205,6 +205,7 @@ "source": [ "ISW_BASE_URL = \"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment\"\n", "\n", + "\n", "def datetime_to_url(dt):\n", " month = dt.strftime(\"%B\").lower()\n", " return f\"{ISW_BASE_URL}-{month}-{dt.day}\"" @@ -245,8 +246,8 @@ " r = requests.get(url)\n", " if r.status_code != 200:\n", " return None\n", - " \n", - " elements = partition_html(text=r.text) \n", + "\n", + " elements = partition_html(text=r.text)\n", " return elements" ] }, @@ -273,13 +274,14 @@ " if element.text == \"Key Takeaways\":\n", " return idx\n", "\n", + "\n", "def get_key_takeaways(elements):\n", " key_takeaways_idx = _find_key_takeaways_idx(elements)\n", " if not key_takeaways_idx:\n", " return None\n", - " \n", + "\n", " takeaways = []\n", - " for element in elements[key_takeaways_idx + 1:]:\n", + " for element in elements[key_takeaways_idx + 1 :]:\n", " if not isinstance(element, ListItem):\n", " break\n", " takeaways.append(element)\n", @@ -333,12 +335,12 @@ "source": [ "def get_narrative(elements):\n", " narrative_text = \"\"\n", - " for element in elements: \n", + " for element in elements:\n", " if isinstance(element, NarrativeText) and len(element.text) > 500:\n", " # NOTE: Removes citations like [3] from the text\n", " element_text = re.sub(\"\\[\\d{1,3}\\]\", \"\", element.text)\n", " narrative_text += f\"\\n\\n{element_text}\"\n", - " \n", + "\n", " return NarrativeText(text=narrative_text.strip())" ] }, @@ -386,10 +388,10 @@ " elements = url_to_elements(url)\n", " if url is None or not elements:\n", " continue\n", - " \n", + "\n", " text = get_narrative(elements)\n", " annotation = get_key_takeaways(elements)\n", - " \n", + "\n", " if text and annotation:\n", " inputs.append(text)\n", " annotations.append(annotation.text)\n", @@ -648,7 +650,7 @@ "outputs": [], "source": [ "from transformers import AutoTokenizer\n", - " \n", + "\n", "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)" ] }, @@ -662,13 +664,16 @@ "max_input_length = 1024\n", "max_target_length = 128\n", "\n", + "\n", "def preprocess_function(examples):\n", " inputs = [doc for doc in examples[\"text\"]]\n", " model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True)\n", "\n", " # Setup the tokenizer for targets\n", " with tokenizer.as_target_tokenizer():\n", - " labels = tokenizer(examples[\"annotation\"], max_length=max_target_length, truncation=True)\n", + " labels = tokenizer(\n", + " examples[\"annotation\"], max_length=max_target_length, truncation=True\n", + " )\n", "\n", " model_inputs[\"labels\"] = labels[\"input_ids\"]\n", " return model_inputs" @@ -691,7 +696,12 @@ "metadata": {}, "outputs": [], "source": [ - "from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer\n", + "from transformers import (\n", + " AutoModelForSeq2SeqLM,\n", + " DataCollatorForSeq2Seq,\n", + " Seq2SeqTrainingArguments,\n", + " Seq2SeqTrainer,\n", + ")\n", "\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)" ] @@ -707,7 +717,7 @@ "model_name = model_checkpoint.split(\"/\")[-1]\n", "args = Seq2SeqTrainingArguments(\n", " \"t5-small-isw-summaries\",\n", - " evaluation_strategy = \"epoch\",\n", + " evaluation_strategy=\"epoch\",\n", " learning_rate=2e-5,\n", " per_device_train_batch_size=batch_size,\n", " per_device_eval_batch_size=batch_size,\n", @@ -775,8 +785,8 @@ "outputs": [], "source": [ "summarization_model = pipeline(\n", - "task=\"summarization\",\n", - "model=\"./t5-small-isw-summaries\",\n", + " task=\"summarization\",\n", + " model=\"./t5-small-isw-summaries\",\n", ")" ] }, diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/curating-feedback-instructiondataset.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/curating-feedback-instructiondataset.ipynb index e18007b4a7..efc72077fb 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/curating-feedback-instructiondataset.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/curating-feedback-instructiondataset.ipynb @@ -90,10 +90,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\")" ] }, { @@ -116,7 +113,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -139,9 +136,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -432,14 +432,25 @@ "outputs": [], "source": [ "# format the data as Argilla records\n", - "records = [rg.FeedbackRecord(fields={\"category\": record[\"category\"], \"instruction\": record[\"instruction\"], \"response\": record[\"response\"], \"context\": record[\"context\"]}, external_id=record['id']) for record in data]\n", + "records = [\n", + " rg.FeedbackRecord(\n", + " fields={\n", + " \"category\": record[\"category\"],\n", + " \"instruction\": record[\"instruction\"],\n", + " \"response\": record[\"response\"],\n", + " \"context\": record[\"context\"],\n", + " },\n", + " external_id=record[\"id\"],\n", + " )\n", + " for record in data\n", + "]\n", "\n", "# list of fields that we will use later for our dataset settings\n", "fields = [\n", " rg.TextField(name=\"category\", title=\"Task category\"),\n", " rg.TextField(name=\"instruction\"),\n", " rg.TextField(name=\"context\", title=\"Input\", required=False),\n", - " rg.TextField(name=\"response\")\n", + " rg.TextField(name=\"response\"),\n", "]" ] }, @@ -458,25 +469,25 @@ "outputs": [], "source": [ "# list of questions to display in the feedback form\n", - "questions =[\n", + "questions = [\n", " rg.TextQuestion(\n", " name=\"new-instruction\",\n", " title=\"Final instruction:\",\n", " description=\"Write the final version of the instruction, making sure that it matches the task category. If the original instruction is ok, copy and paste it here.\",\n", - " required=True\n", + " required=True,\n", " ),\n", " rg.TextQuestion(\n", " name=\"new-input\",\n", " title=\"Final input:\",\n", " description=\"Write the final version of the input, making sure that it makes sense with the task category. If the original input is ok, copy and paste it here. If an input is not needed, leave this empty.\",\n", - " required=False\n", + " required=False,\n", " ),\n", " rg.TextQuestion(\n", " name=\"new-response\",\n", " title=\"Final response:\",\n", " description=\"Write the final version of the response, making sure that it matches the task category and makes sense for the instruction (and input) provided. If the original response is ok, copy and paste it here.\",\n", - " required=True\n", - " )\n", + " required=True,\n", + " ),\n", "]\n", "\n", "guidelines = \"In this dataset, you will find a collection of records that show a category, an instruction, an input and a response to that instruction. The aim of the project is to correct the instructions, input and responses to make sure they are of the highest quality and that they match the task category that they belong to. All three texts should be clear and include real information. In addition, the response should be as complete but concise as possible.\\n\\nTo curate the dataset, you will need to provide an answer to the following text fields:\\n\\n1 - Final instruction:\\nThe final version of the instruction field. You may copy it using the copy icon in the instruction field. Leave it as it is if it's ok or apply any necessary corrections. Remember to change the instruction if it doesn't represent well the task category of the record.\\n\\n2 - Final input:\\nThe final version of the instruction field. You may copy it using the copy icon in the input field. Leave it as it is if it's ok or apply any necessary corrections. If the task category and instruction don't need of an input to be completed, leave this question blank.\\n\\n3 - Final response:\\nThe final version of the response field. You may copy it using the copy icon in the response field. Leave it as it is if it's ok or apply any necessary corrections. Check that the response makes sense given all the fields above.\\n\\nYou will need to provide at least an instruction and a response for all records. If you are not sure about a record and you prefer not to provide a response, click Discard.\"" @@ -504,7 +515,7 @@ "metadata": {}, "outputs": [], "source": [ - "users = [user for user in rg.User.list() if user.role =='annotator']" + "users = [user for user in rg.User.list() if user.role == \"annotator\"]" ] }, { @@ -530,28 +541,26 @@ "# divide your records in chunks of the same length as the users list and make the assignments\n", "# you will need to follow the instructions to create and push a dataset for each of the key-value pairs in this dictionary\n", "n = len(users)\n", - "chunked_records = [records[i:i + n] for i in range(0, len(records), n)]\n", + "chunked_records = [records[i : i + n] for i in range(0, len(records), n)]\n", "for chunk in chunked_records:\n", " for idx, record in enumerate(chunk):\n", " assignments[users[idx].username].append(record)\n", "\n", - "for username,records in assignments.items():\n", + "for username, records in assignments.items():\n", " # check that the user has a personal workspace and create it if not\n", " try:\n", " workspace = rg.Workspace.from_name(username)\n", - " except: \n", + " except:\n", " workspace = rg.Workspace.create(username)\n", " user = rg.User.from_name(username)\n", " workspace.add_user(user.id)\n", "\n", " # create a dataset for each annotator and push it to their personal workspace\n", " dataset = rg.FeedbackDataset(\n", - " guidelines=guidelines,\n", - " fields=fields,\n", - " questions=questions\n", + " guidelines=guidelines, fields=fields, questions=questions\n", " )\n", " dataset.add_records(records)\n", - " dataset.push_to_argilla(name='curate_dolly', workspace=workspace.name)" + " dataset.push_to_argilla(name=\"curate_dolly\", workspace=workspace.name)" ] }, { @@ -571,7 +580,7 @@ "source": [ "feedback = []\n", "for username in assignments.keys():\n", - " feedback.extend(rg.FeedbackDataset.from_argilla('curate_dolly', workspace=username))" + " feedback.extend(rg.FeedbackDataset.from_argilla(\"curate_dolly\", workspace=username))" ] }, { @@ -593,25 +602,31 @@ "for record in feedback:\n", " if record.responses is None or len(record.responses) == 0:\n", " continue\n", - " \n", + "\n", " # we should only have 1 response per record, so we can safely use the first one only\n", " response = record.responses[0]\n", "\n", - " if response.status != 'submitted':\n", + " if response.status != \"submitted\":\n", " changes = []\n", " else:\n", " changes = []\n", - " if response.values['new-instruction'].value != record.fields['instruction']:\n", - " changes.append('instruction')\n", - " if response.values['new-input'].value != record.fields['context']:\n", - " changes.append('input')\n", - " if response.values['new-response'].value != record.fields['response']:\n", - " changes.append('response')\n", - "\n", - " responses.append({'status': response.status, 'category': record.fields['category'], 'changes': ','.join(changes)})\n", + " if response.values[\"new-instruction\"].value != record.fields[\"instruction\"]:\n", + " changes.append(\"instruction\")\n", + " if response.values[\"new-input\"].value != record.fields[\"context\"]:\n", + " changes.append(\"input\")\n", + " if response.values[\"new-response\"].value != record.fields[\"response\"]:\n", + " changes.append(\"response\")\n", + "\n", + " responses.append(\n", + " {\n", + " \"status\": response.status,\n", + " \"category\": record.fields[\"category\"],\n", + " \"changes\": \",\".join(changes),\n", + " }\n", + " )\n", "\n", "responses_df = pd.DataFrame(responses)\n", - "responses_df = responses_df.replace('', 'None')" + "responses_df = responses_df.replace(\"\", \"None\")" ] }, { @@ -620,7 +635,7 @@ "metadata": {}, "outputs": [], "source": [ - "fig = px.histogram(responses_df, x='status')\n", + "fig = px.histogram(responses_df, x=\"status\")\n", "fig.show()" ] }, @@ -650,8 +665,8 @@ "metadata": {}, "outputs": [], "source": [ - "fig = px.histogram(responses_df.loc[responses_df['status']=='submitted'], x='changes')\n", - "fig.update_xaxes(categoryorder='total descending')\n", + "fig = px.histogram(responses_df.loc[responses_df[\"status\"] == \"submitted\"], x=\"changes\")\n", + "fig.update_xaxes(categoryorder=\"total descending\")\n", "fig.update_layout(bargap=0.2)\n", "fig.show()" ] @@ -693,13 +708,13 @@ " # we should only have 1 response per record, so we can safely use the first one only\n", " response = record.responses[0]\n", " # we will skip records where our annotators didn't submit their feedback\n", - " if response.status != 'submitted':\n", + " if response.status != \"submitted\":\n", " continue\n", "\n", - " record.fields['instruction'] = response.values['new-instruction'].value\n", - " record.fields['context'] = response.values['new-input'].value\n", - " record.fields['response'] = response.values['new-response'].value\n", - " \n", + " record.fields[\"instruction\"] = response.values[\"new-instruction\"].value\n", + " record.fields[\"context\"] = response.values[\"new-input\"].value\n", + " record.fields[\"response\"] = response.values[\"new-response\"].value\n", + "\n", " new_records.append(record.fields)" ] }, @@ -930,7 +945,7 @@ "metadata": {}, "outputs": [], "source": [ - "#push to hub\n", + "# push to hub\n", "new_dataset = Dataset(new_records)\n", "new_dataset.push_to_hub(\".../curated_databricks-dolly-15k\")" ] diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-metadata-003.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-metadata-003.ipynb index 5724277807..afab07b48e 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-metadata-003.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-metadata-003.ipynb @@ -187,9 +187,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -807,19 +810,9 @@ "source": [ "filtered_records = dataset_remote.filter_by(\n", " metadata_filters=[\n", - " rg.TermsMetadataFilter(\n", - " name=\"group\",\n", - " values=[\"group-1\", \"group-2\"]\n", - " ),\n", - " rg.IntegerMetadataFilter(\n", - " name=\"length\",\n", - " le=282\n", - " ),\n", - " rg.FloatMetadataFilter(\n", - " name=\"length_std\",\n", - " ge=204, \n", - " le=290\n", - " ), \n", + " rg.TermsMetadataFilter(name=\"group\", values=[\"group-1\", \"group-2\"]),\n", + " rg.IntegerMetadataFilter(name=\"length\", le=282),\n", + " rg.FloatMetadataFilter(name=\"length_std\", ge=204, le=290),\n", " ]\n", ")\n", "\n", @@ -843,25 +836,15 @@ "\n", "filtered_dataset = dataset_remote.filter_by(\n", " metadata_filters=[\n", - " rg.TermsMetadataFilter(\n", - " name=\"group\",\n", - " values=[\"group-1\", \"group-2\"]\n", - " ),\n", - " rg.IntegerMetadataFilter(\n", - " name=\"length\",\n", - " le=282\n", - " ),\n", - " rg.FloatMetadataFilter(\n", - " name=\"length_std\",\n", - " ge=204, \n", - " le=290\n", - " ),\n", + " rg.TermsMetadataFilter(name=\"group\", values=[\"group-1\", \"group-2\"]),\n", + " rg.IntegerMetadataFilter(name=\"length\", le=282),\n", + " rg.FloatMetadataFilter(name=\"length_std\", ge=204, le=290),\n", " ],\n", - " response_status=[\"discarded\"]\n", + " response_status=[\"discarded\"],\n", ").sort_by(\n", " [\n", " SortBy(field=\"updated_at\", order=\"desc\"),\n", - " SortBy(field=\"metadata.group\", order=\"asc\")\n", + " SortBy(field=\"metadata.group\", order=\"asc\"),\n", " ]\n", ")" ] @@ -892,7 +875,9 @@ "outputs": [], "source": [ "try:\n", - " remote_dataset = rg.FeedbackDataset.from_argilla(\"end2end_textclassification_with_metadata\")\n", + " remote_dataset = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification_with_metadata\"\n", + " )\n", " remote_dataset.delete()\n", "except:\n", " pass\n", @@ -969,8 +954,10 @@ }, "outputs": [], "source": [ - "#papermill_description=push-dataset-to-huggingface \n", - "dataset.push_to_huggingface(\"argilla/end2end_textclassification_with_metadata\", generate_card=True)" + "# papermill_description=push-dataset-to-huggingface\n", + "dataset.push_to_huggingface(\n", + " \"argilla/end2end_textclassification_with_metadata\", generate_card=True\n", + ")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-suggestions-and-responses-005.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-suggestions-and-responses-005.ipynb index 5543e317eb..c301649f0a 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-suggestions-and-responses-005.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-suggestions-and-responses-005.ipynb @@ -108,8 +108,8 @@ "outputs": [], "source": [ "# Argilla credentials\n", - "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", - "api_key = DEFAULT_API_KEY # admin.apikey\n", + "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", + "api_key = DEFAULT_API_KEY # admin.apikey\n", "# Huggingface credentials\n", "hf_token = \"hf_...\"" ] @@ -147,9 +147,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -294,8 +297,8 @@ "for index, record in enumerate(dataset.records):\n", " record.responses = [\n", " {\n", - " \"values\":{\n", - " \"label\":{\n", + " \"values\": {\n", + " \"label\": {\n", " \"value\": mapped_labels[index],\n", " }\n", " }\n", @@ -323,8 +326,8 @@ "for index, record in enumerate(modified_records):\n", " record.responses = [\n", " {\n", - " \"values\":{\n", - " \"label\":{\n", + " \"values\": {\n", + " \"label\": {\n", " \"value\": mapped_labels[index],\n", " }\n", " },\n", @@ -383,7 +386,7 @@ " {\n", " \"question_name\": \"label\",\n", " \"value\": classifier(record.fields[\"text\"], candidate_labels)[\"labels\"][0],\n", - " \"agent\": model_name \n", + " \"agent\": model_name,\n", " }\n", " ]" ] @@ -410,7 +413,7 @@ " {\n", " \"question_name\": \"label\",\n", " \"value\": classifier(record.fields[\"text\"], candidate_labels)[\"labels\"][0],\n", - " \"agent\": model_name \n", + " \"agent\": model_name,\n", " }\n", " ]\n", "\n", @@ -444,11 +447,15 @@ "outputs": [], "source": [ "try:\n", - " remote_dataset = rg.FeedbackDataset.from_argilla(\"end2end_textclassification_with_suggestions_and_responses\")\n", + " remote_dataset = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification_with_suggestions_and_responses\"\n", + " )\n", " remote_dataset.delete()\n", "except Exception:\n", " pass\n", - "remote_dataset = dataset.push_to_argilla(\"end2end_textclassification_with_suggestions_and_responses\")" + "remote_dataset = dataset.push_to_argilla(\n", + " \"end2end_textclassification_with_suggestions_and_responses\"\n", + ")" ] }, { @@ -501,8 +508,11 @@ "metadata": {}, "outputs": [], "source": [ - "#papermill_description=push-dataset-to-huggingface\n", - "dataset.push_to_huggingface(\"argilla/end2end_textclassification_with_suggestions_and_responses\", generate_card=True)" + "# papermill_description=push-dataset-to-huggingface\n", + "dataset.push_to_huggingface(\n", + " \"argilla/end2end_textclassification_with_suggestions_and_responses\",\n", + " generate_card=True,\n", + ")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-vectors-004.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-vectors-004.ipynb index f882042088..2c6d680793 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-vectors-004.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/add-vectors-004.ipynb @@ -118,8 +118,8 @@ "outputs": [], "source": [ "# Argilla credentials\n", - "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", - "api_key = DEFAULT_API_KEY # admin.apikey\n", + "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", + "api_key = DEFAULT_API_KEY # admin.apikey\n", "# Huggingface credentials\n", "hf_token = \"hf_...\"" ] @@ -137,10 +137,7 @@ "metadata": {}, "outputs": [], "source": [ - "rg.init(\n", - " api_url=api_url,\n", - " api_key=api_key\n", - ")" + "rg.init(api_url=api_url, api_key=api_key)" ] }, { @@ -160,9 +157,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -261,9 +261,7 @@ "outputs": [], "source": [ "vector_settings = rg.VectorSettings(\n", - " name=\"sentence_embedding\",\n", - " title=\"Sentence Embedding\",\n", - " dimensions=384\n", + " name=\"sentence_embedding\", title=\"Sentence Embedding\", dimensions=384\n", ")" ] }, @@ -415,9 +413,7 @@ "outputs": [], "source": [ "similar_records = dataset_remote.find_similar_records(\n", - " vector_name=\"sentence_embedding\",\n", - " record=dataset_remote[0],\n", - " max_results=5\n", + " vector_name=\"sentence_embedding\", record=dataset_remote[0], max_results=5\n", ")" ] }, @@ -470,9 +466,7 @@ "text_vector = model.encode(text).tolist()\n", "\n", "similar_records = dataset_remote.find_similar_records(\n", - " vector_name=\"sentence_embedding\",\n", - " value=text_vector,\n", - " max_results=5\n", + " vector_name=\"sentence_embedding\", value=text_vector, max_results=5\n", ")" ] }, @@ -523,7 +517,9 @@ "outputs": [], "source": [ "try:\n", - " remote_dataset = rg.FeedbackDataset.from_argilla(\"end2end_textclassification_with_vectors\")\n", + " remote_dataset = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification_with_vectors\"\n", + " )\n", " remote_dataset.delete()\n", "except:\n", " pass\n", @@ -581,8 +577,10 @@ "metadata": {}, "outputs": [], "source": [ - "#papermill_description=push-dataset-to-huggingface\n", - "dataset.push_to_huggingface(\"argilla/end2end_textclassification_with_vectors\", generate_card=True)" + "# papermill_description=push-dataset-to-huggingface\n", + "dataset.push_to_huggingface(\n", + " \"argilla/end2end_textclassification_with_vectors\", generate_card=True\n", + ")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/assign-records-002.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/assign-records-002.ipynb index 512208e6b6..7547913d21 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/assign-records-002.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/assign-records-002.ipynb @@ -118,8 +118,8 @@ "outputs": [], "source": [ "# Argilla credentials\n", - "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", - "api_key = DEFAULT_API_KEY # admin.apikey\n", + "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", + "api_key = DEFAULT_API_KEY # admin.apikey\n", "# Huggingface credentials\n", "hf_token = \"hf_...\"" ] @@ -157,9 +157,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -304,7 +307,7 @@ "# Set our teams\n", "groups = {\n", " \"group_a\": [\"emily\", \"carlos\", \"aisha\", \"dimitry\", \"chloe\"],\n", - " \"group_b\": [\"liam\", \"hina\", \"samuel\", \"maya\", \"luca\"]\n", + " \"group_b\": [\"liam\", \"hina\", \"samuel\", \"maya\", \"luca\"],\n", "}" ] }, @@ -316,7 +319,7 @@ "source": [ "# Get the records\n", "records = [record for record in dataset]\n", - "print('Number of records:', len(records))" + "print(\"Number of records:\", len(records))" ] }, { @@ -350,12 +353,7 @@ "source": [ "from argilla.client.feedback.utils import assign_records\n", "\n", - "assignments = assign_records(\n", - " users=groups,\n", - " records=records,\n", - " overlap=1,\n", - " shuffle=True\n", - ")" + "assignments = assign_records(users=groups, records=records, overlap=1, shuffle=True)" ] }, { @@ -397,10 +395,7 @@ "source": [ "from argilla.client.feedback.utils import assign_workspaces\n", "\n", - "wk_group = assign_workspaces(\n", - " assignments=assignments,\n", - " workspace_type=\"group\"\n", - ")\n", + "wk_group = assign_workspaces(assignments=assignments, workspace_type=\"group\")\n", "wk_group" ] }, @@ -418,10 +413,14 @@ " assigned_records = next(iter(assignments[group].values()))\n", " dataset.add_records(assigned_records)\n", " try:\n", - " remote_dataset = dataset.from_argilla(name=\"text_classification\", workspace=group)\n", + " remote_dataset = dataset.from_argilla(\n", + " name=\"text_classification\", workspace=group\n", + " )\n", " remote_dataset.delete()\n", " except Exception:\n", - " remote_dataset = dataset.push_to_argilla(name=\"text_classification\", workspace=group)" + " remote_dataset = dataset.push_to_argilla(\n", + " name=\"text_classification\", workspace=group\n", + " )" ] }, { @@ -452,8 +451,7 @@ "from argilla.client.feedback.utils import assign_workspaces\n", "\n", "wk_group_personal = assign_workspaces(\n", - " assignments=assignments,\n", - " workspace_type=\"group_personal\"\n", + " assignments=assignments, workspace_type=\"group_personal\"\n", ")\n", "wk_group_personal" ] @@ -473,10 +471,14 @@ " assigned_records = users[user]\n", " dataset.add_records(assigned_records)\n", " try:\n", - " remote_dataset = dataset.from_argilla(name=\"text_classification\", workspace=user)\n", + " remote_dataset = dataset.from_argilla(\n", + " name=\"text_classification\", workspace=user\n", + " )\n", " remote_dataset.delete()\n", " except Exception:\n", - " remote_dataset = dataset.push_to_argilla(name=\"text_classification\", workspace=user)" + " remote_dataset = dataset.push_to_argilla(\n", + " name=\"text_classification\", workspace=user\n", + " )" ] }, { @@ -556,7 +558,7 @@ "source": [ "# Get only those users with the role \"annotator\"\n", "users = [user for user in rg.User.list() if user.role == \"annotator\"]\n", - "print('Number of users:', len(users))" + "print(\"Number of users:\", len(users))" ] }, { @@ -567,7 +569,7 @@ "source": [ "# Get the records\n", "records = [record for record in dataset]\n", - "print('Number of records:', len(records))" + "print(\"Number of records:\", len(records))" ] }, { @@ -599,12 +601,7 @@ "source": [ "from argilla.client.feedback.utils import assign_records\n", "\n", - "assignments = assign_records(\n", - " users=users,\n", - " records=records,\n", - " overlap=2,\n", - " shuffle=True\n", - ")" + "assignments = assign_records(users=users, records=records, overlap=2, shuffle=True)" ] }, { @@ -639,10 +636,7 @@ "source": [ "from argilla.client.feedback.utils import assign_workspaces\n", "\n", - "wk_individual = assign_workspaces(\n", - " assignments=assignments,\n", - " workspace_type=\"individual\"\n", - ")\n", + "wk_individual = assign_workspaces(assignments=assignments, workspace_type=\"individual\")\n", "wk_individual" ] }, @@ -659,10 +653,14 @@ " )\n", " dataset.add_records(records)\n", " try:\n", - " remote_dataset = dataset.from_argilla(name=\"text-classification\", workspace=username)\n", + " remote_dataset = dataset.from_argilla(\n", + " name=\"text-classification\", workspace=username\n", + " )\n", " remote_dataset.delete()\n", - " except Exception: \n", - " remote_dataset = dataset.push_to_argilla(name=\"text-classification\", workspace=username)" + " except Exception:\n", + " remote_dataset = dataset.push_to_argilla(\n", + " name=\"text-classification\", workspace=username\n", + " )" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/configure-users-and-workspaces-000.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/configure-users-and-workspaces-000.ipynb index 276e14f102..fdbdf58dab 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/configure-users-and-workspaces-000.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/configure-users-and-workspaces-000.ipynb @@ -178,9 +178,9 @@ "outputs": [], "source": [ "# Argilla credentials\n", - "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", - "api_key = DEFAULT_API_KEY # admin.apikey\n", - "hf_token = None # not used in this tutorial" + "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", + "api_key = DEFAULT_API_KEY # admin.apikey\n", + "hf_token = None # not used in this tutorial" ] }, { @@ -207,10 +207,7 @@ }, "outputs": [], "source": [ - "rg.init(\n", - " api_url=api_url,\n", - " api_key=api_key\n", - ")" + "rg.init(api_url=api_url, api_key=api_key)" ] }, { @@ -233,6 +230,7 @@ }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -243,14 +241,18 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb index 11e0ca9ca8..7a92685d5a 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb @@ -1,1145 +1,1156 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "fc0c600b", - "metadata": { - "papermill": { - "duration": 0.046655, - "end_time": "2023-11-15T10:59:17.308341", - "exception": false, - "start_time": "2023-11-15T10:59:17.261686", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Creating a `FeedbackDataset`\n", - "\n", - "This tutorial is part of a series in which we will get to know the `FeedbackDataset`. Before starting this tutorial, you need to do the tutorial on [configuring users and workspaces](./configure-users-and-workspaces-000.ipynb). In this step, we will show how to configure a `FeedbackDataset` and add `FeedbackRecords` to it. If you need additional context, consult [our practical guide on creating a dataset](../../../../practical_guides/create_update_dataset/create_dataset.md).\n", - "\n", - "![workflow](../../../../_static/tutorials/end2end/base/workflow_create_dataset.svg)\n", - "\n", - "We will start by creating a basic dataset using the [ag_news](https://huggingface.co/datasets/ag_news) dataset as an example and push it to `Argilla` and the Hugging Face `hub`." - ] - }, - { - "cell_type": "markdown", - "id": "e557a44e-26fb-4b52-91ec-2b49d550a5cd", - "metadata": { - "editable": true, - "papermill": { - "duration": 0.011949, - "end_time": "2023-11-15T10:59:17.340523", - "exception": false, - "start_time": "2023-11-15T10:59:17.328574", - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## Table of Contents\n", - "\n", - "1. [Configure a FeedbackDataset](#Configure-a-FeedbackDataset)\n", - "2. [Add FeedbackRecords](#Add-FeedbackRecords)\n", - " 1. [From a Hugging Face dataset](#From-a-Hugging-Face-dataset)\n", - " 2. [From a pandas.DataFrame](#From-a-pandas.DataFrame)\n", - "3. [Save and load a FeedbackDataset](#Save-and-load-a-FeedbackDataset)\n", - " 1. [Push our FeedbackDataset to Argilla](#From-Argilla)\n", - " 2. [Push our FeedbackDataset to the Hugging Face hub](#From-Hugging-Face-hub)\n", - "4. [Conclusion](#Conclusion)" - ] - }, - { - "cell_type": "markdown", - "id": "566b7b82", - "metadata": { - "papermill": { - "duration": 0.007744, - "end_time": "2023-11-15T10:59:17.355040", - "exception": false, - "start_time": "2023-11-15T10:59:17.347296", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Running Argilla\n", - "\n", - "For this tutorial, you will need to have an Argilla server running. There are two main options for deploying and running Argilla:\n", - "\n", - "**Deploy Argilla on Hugging Face Spaces:** If you want to run tutorials with external notebooks (e.g., Google Colab) and you have an account on Hugging Face, you can deploy Argilla on Spaces with a few clicks:\n", - "\n", - "[![deploy on spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/deploy-to-spaces-lg.svg)](https://huggingface.co/new-space?template=argilla/argilla-template-space)\n", - "\n", - "For details about configuring your deployment, check the [official Hugging Face Hub guide](https://huggingface.co/docs/hub/spaces-sdks-docker-argilla).\n", - "\n", - "**Launch Argilla using Argilla's quickstart Docker image**: This is the recommended option if you want [Argilla running on your local machine](../../../../getting_started/quickstart.md). Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", - "\n", - "For more information on deployment options, please check the Deployment section of the documentation.\n", - "\n", - "
\n", - "\n", - "Tip\n", - "\n", - "This tutorial is a Jupyter Notebook. There are two options to run it:\n", - "\n", - "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", - "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.004557, - "end_time": "2023-11-15T10:59:17.364909", - "exception": false, - "start_time": "2023-11-15T10:59:17.360352", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "First let's install our dependencies and import the necessary libraries:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bbbd3c38-1e3c-4d7f-97e6-22dd3b668fc8", - "metadata": { - "editable": true, - "papermill": { - "duration": 3.857186, - "end_time": "2023-11-15T10:59:21.228277", - "exception": false, - "start_time": "2023-11-15T10:59:17.371091", - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "!pip install argilla\n", - "!pip install datasets" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "51b3b93c", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "import argilla as rg\n", - "from argilla._constants import DEFAULT_API_KEY\n", - "from datasets import load_dataset" - ] - }, - { - "cell_type": "markdown", - "id": "c330bed5-38d1-45bf-b871-98e629ab3af8", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "In order to run this notebook we will need some credentials to push and load datasets from `Argilla` and 🤗`hub`, let's set them in the following cell:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ca4fd7ae-7e31-405e-84c1-974828a903bd", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# Argilla credentials\n", - "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", - "api_key = DEFAULT_API_KEY # admin.apikey\n", - "# Huggingface credentials\n", - "hf_token = \"hf_...\"" - ] - }, - { - "cell_type": "markdown", - "id": "3464037d-e21e-4db4-bfb4-e300fe4c9e53", - "metadata": {}, - "source": [ - "Log to argilla:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "728e6af7", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#papermill_description=logging-to-argilla\n", - "rg.init(\n", - " api_url=api_url,\n", - " api_key=api_key\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Enable Telemetry\n", - "\n", - "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../../../reference/telemetry.md) page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " from argilla.utils.telemetry import tutorial_running\n", - " tutorial_running()\n", - "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" - ] - }, - { - "cell_type": "markdown", - "id": "f50177aa-4c94-4b8f-8293-a3710e3cb0ca", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Configure a `FeedbackDataset` " - ] - }, - { - "cell_type": "markdown", - "id": "529f4e77-6c05-4d59-b153-170acee36b97", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "For this tutorial we will use the [ag_news](https://huggingface.co/datasets/ag_news) dataset which can be downloaded from the 🤗`hub`. We will load only the first 1000 items from the training sample." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8b01b473-a815-427c-986e-6c2bedf1b5d7", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Dataset({\n", - " features: ['text', 'label'],\n", - " num_rows: 1000\n", - "})" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = load_dataset(\"ag_news\", split=\"train[:1000]\")\n", - "ds" - ] - }, - { - "cell_type": "markdown", - "id": "5b4eb4ff-9530-4542-9975-25d0f4f725c1", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "We will just load the first 1000 records for this tutorial, but feel free to test the full dataset." - ] - }, - { - "cell_type": "markdown", - "id": "6d2e7997-62c6-49b3-9462-acd6143346a1", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "This dataset contains a collection of news articles (we can see the content in the `text` column), which have been asigned one of the following classification `labels`: *World (0), Sports (1), Business (2), Sci/Tech (3)*.\n", - "\n", - "Let's use the [task templates](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#task-templates) to create a feedback dataset ready for `text-classification`." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "7294b709", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "FeedbackDataset(\n", - " fields=[TextField(name='text', title='Text', required=True, type='text', use_markdown=False)]\n", - " questions=[LabelQuestion(name='label', title='Label', description='Classify the text by selecting the correct label from the given list of labels.', required=True, type='label_selection', labels=['World', 'Sports', 'Business', 'Sci/Tech'], visible_labels=None)]\n", - " guidelines=Classify the articles into one of the four categories.)\n", - " metadata_properties=[])\n", - ")" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "feedback_dataset = rg.FeedbackDataset.for_text_classification(\n", - " labels=[\"World\", \"Sports\", \"Business\", \"Sci/Tech\"],\n", - " guidelines=\"Classify the articles into one of the four categories.\",\n", - ")\n", - "feedback_dataset" - ] - }, - { - "cell_type": "markdown", - "id": "5a5238ea", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "We could compare this dataset with the custom configuration we would use previously (we can take a look at the [custom configuration](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#custom-configuration) for more information on the creation of a `FeedbackDataset` when we want a finer control):" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f4823931-39a2-4942-ba83-894d62e0b7cc", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "FeedbackDataset(\n", - " fields=[TextField(name='text', title='Text from the article', required=True, type='text', use_markdown=False)]\n", - " questions=[LabelQuestion(name='label', title='In which category does this article fit?', description=None, required=True, type='label_selection', labels={'World': '0', 'Sports': '1', 'Business': '2', 'Sci/Tech': '3'}, visible_labels=None)]\n", - " guidelines=Classify the articles into one of the four categories.)\n", - " metadata_properties=[])\n", - ")" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "feedback_dataset_long = rg.FeedbackDataset(\n", - " guidelines=\"Classify the articles into one of the four categories.\",\n", - " fields=[\n", - " rg.TextField(name=\"text\", title=\"Text from the article\"),\n", - " ],\n", - " questions=[\n", - " rg.LabelQuestion(\n", - " name=\"label\",\n", - " title=\"In which category does this article fit?\",\n", - " labels={\"World\": \"0\", \"Sports\": \"1\", \"Business\": \"2\", \"Sci/Tech\": \"3\"},\n", - " required=True,\n", - " visible_labels=None\n", - " )\n", - " ]\n", - ")\n", - "feedback_dataset_long" - ] - }, - { - "cell_type": "markdown", - "id": "77ad10bc-d908-41ba-9a5c-c9cc9d56b67f", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Add `FeedbackRecords` \n", - "\n", - "### From a Hugging Face `dataset` " - ] - }, - { - "cell_type": "markdown", - "id": "9c194d68-6a87-4bc0-9d4e-f2afdb0561d9", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "The next step once we have our `FeedbackDataset` created is adding the [FeedbackRecords](https://docs.v1.argilla.io/en/latest/getting_started/cheatsheet.html#create-records) to it." - ] - }, - { - "cell_type": "markdown", - "id": "5110f19b-8bf2-4d48-8a1f-ffd89cfa20f5", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "In order to create our records we can just loop over the items in the `datasets.Dataset`." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "825342c7-ef43-4747-8523-18f3e3fc6bd5", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "records = []\n", - "for i, item in enumerate(ds):\n", - " records.append(\n", - " rg.FeedbackRecord(\n", - " fields={\n", - " \"text\": item[\"text\"],\n", - " },\n", - " external_id=f\"record-{i}\"\n", - " )\n", - " )\n", - "\n", - "# We can add an external_id to each record to identify it later." - ] - }, - { - "cell_type": "markdown", - "id": "3eb1ae7a-01f9-439b-87ac-6d0064164c80", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "### From a `pandas.DataFrame` " - ] - }, - { - "cell_type": "markdown", - "id": "dba19b65-60d3-4507-aca6-1b4b9bc77d08", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "If we had our data in a different format, let's say a `csv` file, maybe it's more direct to read the data using pandas for that.\n", - "\n", - "We will transform our dataset to pandas format for this example, and the remaining `FeedbackRecord` creation remains just the same:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "82beb35b-264f-491a-a6be-bd53f40fb509", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
textlabel
0Wall St. Bears Claw Back Into the Black (Reute...2
1Carlyle Looks Toward Commercial Aerospace (Reu...2
2Oil and Economy Cloud Stocks' Outlook (Reuters...2
3Iraq Halts Oil Exports from Main Southern Pipe...2
4Oil prices soar to all-time record, posing new...2
\n", - "
" - ], - "text/plain": [ - " text label\n", - "0 Wall St. Bears Claw Back Into the Black (Reute... 2\n", - "1 Carlyle Looks Toward Commercial Aerospace (Reu... 2\n", - "2 Oil and Economy Cloud Stocks' Outlook (Reuters... 2\n", - "3 Iraq Halts Oil Exports from Main Southern Pipe... 2\n", - "4 Oil prices soar to all-time record, posing new... 2" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_dataset = ds.to_pandas()\n", - "df_dataset.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7b33af-297c-44ab-981c-b62cf1406843", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "records_pandas = []\n", - "for i, item in df_dataset.iterrows():\n", - " records_pandas.append(\n", - " rg.FeedbackRecord(\n", - " fields={\n", - " \"text\": item[\"text\"],\n", - " },\n", - " external_id=f\"record-{i}\"\n", - " )\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "dd3f1c38-3b56-46e8-8cac-7b3d21beb38d", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Let's add our records to the dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "9bd0ec1c-1fcb-4d4f-a0a2-4f581a670e8b", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "feedback_dataset.add_records(records)" - ] - }, - { - "cell_type": "markdown", - "id": "16fe9266-7a03-4be6-8789-b141854bcdda", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "By now we have our dataset with the texts ready to be labeled, let's push it to `Argilla`." - ] - }, - { - "cell_type": "markdown", - "id": "b867eb5b-0b5a-42b1-88f2-f5b67519243e", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Save and load a `FeedbackDataset` \n", - "\n", - "### From `Argilla` " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8f5581dc-8509-484e-ad4e-c73d15227cc9", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#papermill_description=push-dataset-to-argilla\n", - "try:\n", - " # delete old dataset\n", - " remote_dataset = feedback_dataset.from_argilla(name=\"end2end_textclassification\", workspace=\"argilla\")\n", - " remote_dataset.delete()\n", - "except:\n", - " pass\n", - "remote_dataset = feedback_dataset.push_to_argilla(name=\"end2end_textclassification\", workspace=\"argilla\")" - ] - }, - { - "cell_type": "markdown", - "id": "aff0f7fd-722f-4f92-912d-029b134848f3", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "If we go to our `Argilla` instance we should see a similar screen like the following." - ] - }, - { - "cell_type": "markdown", - "id": "827fce93", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![feedback-dataset](../../../../_static/tutorials/end2end/text-classification/feedback-dataset-text-classification-1.png)" - ] - }, - { - "cell_type": "markdown", - "id": "e5f09e2a-e192-4526-a686-b4d516ca5066", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Where we can see the *Text from the article* we wanted, and the different labels to choose from." - ] - }, - { - "cell_type": "markdown", - "id": "fd877169-bd9a-4596-981d-47d2ff4c7c24", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "We can now download the dataset from `Argilla` just to check it:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "fbae2025-c4fe-4e0c-b445-9ee308baa4a5", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "RemoteFeedbackDataset(\n", - " id=52b0dfc2-ed85-4805-923c-5d51b51ec4c9\n", - " name=end2end_textclassification\n", - " workspace=Workspace(id=ce760ed7-0fdf-4d79-b9b7-1c0e4ea896cd, name=argilla, inserted_at=2023-11-23 09:46:05.591993, updated_at=2023-11-23 09:46:05.591993)\n", - " url=http://localhost:6900/dataset/52b0dfc2-ed85-4805-923c-5d51b51ec4c9/annotation-mode\n", - " fields=[RemoteTextField(id=UUID('2835bf0e-1259-45b9-a97c-f9b671395563'), client=None, name='text', title='Text', required=True, type='text', use_markdown=False)]\n", - " questions=[RemoteLabelQuestion(id=UUID('bb6fc4f0-e4b7-480c-84a1-df717de4ac97'), client=None, name='label', title='Label', description=None, required=True, type='label_selection', labels=['World', 'Sports', 'Business', 'Sci/Tech'], visible_labels=None)]\n", - " guidelines=Classify the articles into one of the four categories.\n", - " metadata_properties=[]\n", - ")" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_dataset = rg.FeedbackDataset.from_argilla(\"end2end_textclassification\", workspace=\"argilla\")\n", - "remote_dataset" - ] - }, - { - "cell_type": "markdown", - "id": "1f16d868-a9df-4f01-85ac-06bfcee19101", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "### From Hugging Face hub \n", - "\n", - "If we wanted to share our dataset with the world, we could use the Huggingface hub for it." - ] - }, - { - "cell_type": "markdown", - "id": "24aae42a-6b0f-40eb-9d92-4aafcb735844", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "First we need to login to huggingface. The following cell will log us with our previous token.\n", - "\n", - "If we don't have one already, we can obtain it from [here](https://huggingface.co/docs/hub/security-tokens) (remember to set the *write* access)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1fabcc89-323f-4f4f-9ab9-4b778dd93474", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from huggingface_hub import login\n", - "\n", - "login(token=hf_token)" - ] - }, - { - "cell_type": "markdown", - "id": "694c381d-7f7d-4b2d-938a-20df260ec957", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "And now we can just call the method on the `FeedbackDataset`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c471559-916c-4bf8-b115-3d1ba0be621b", - "metadata": { - "editable": true, - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#papermill_description=push-dataset-to-huggingface\n", - "remote_dataset.push_to_huggingface(\"argilla/end2end_textclassification\")" - ] - }, - { - "cell_type": "markdown", - "id": "761250bc", - "metadata": {}, - "source": [ - "We can now download the dataset from Hugging Face just to check it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5405ba2", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "local_dataset = rg.FeedbackDataset.from_huggingface(\"argilla/end2end_textclassification\")" - ] - }, - { - "cell_type": "markdown", - "id": "408cb3c9-c784-45c8-b206-058a23b801d8", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "id": "6cdb1244-1589-4e85-be36-e0c959817c6d", - "metadata": { - "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "completed" - }, - "tags": [] - }, - "source": [ - "In this tutorial we created an `Argilla` `FeedbackDataset` for text classification, starting from [ag_news](https://huggingface.co/datasets/ag_news).\n", - "\n", - "We created a `FeedbackDataset` for text classification with a `LabelQuestion`, from data stored as a `datasets.Dataset` and a `pandas.DataFrame`.\n", - "This dataset was pushed both to `Argilla` where we can curate and label the records, and finally pushed it to the 🤗`hub`.\n", - "\n", - "To learn more about how to work with the `FeedbackDataset` check the [cheatsheet](https://docs.v1.argilla.io/en/latest/getting_started/cheatsheet.html#cheatsheet). To continue with assigning records to annotators, you can refer to the [next tutorial](./assign-records-002.ipynb)." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - }, - "papermill": { - "default_parameters": {}, - "duration": 5.95317, - "end_time": "2023-11-15T10:59:21.949504", - "environment_variables": {}, - "exception": null, - "input_path": "/home/agustin/github_repos/argilla-io/argilla/docs/_source/practical_guides/examples/text_classification/text-classification-create-dataset.ipynb", - "output_path": "/home/agustin/github_repos/argilla-io/argilla/docs/_source/practical_guides/examples/text_classification/text-classification-create-dataset.ipynb", - "parameters": {}, - "start_time": "2023-11-15T10:59:15.996334", - "version": "2.5.0" - } + "cells": [ + { + "cell_type": "markdown", + "id": "fc0c600b", + "metadata": { + "papermill": { + "duration": 0.046655, + "end_time": "2023-11-15T10:59:17.308341", + "exception": false, + "start_time": "2023-11-15T10:59:17.261686", + "status": "completed" }, - "nbformat": 4, - "nbformat_minor": 5 + "tags": [] + }, + "source": [ + "# Creating a `FeedbackDataset`\n", + "\n", + "This tutorial is part of a series in which we will get to know the `FeedbackDataset`. Before starting this tutorial, you need to do the tutorial on [configuring users and workspaces](./configure-users-and-workspaces-000.ipynb). In this step, we will show how to configure a `FeedbackDataset` and add `FeedbackRecords` to it. If you need additional context, consult [our practical guide on creating a dataset](../../../../practical_guides/create_update_dataset/create_dataset.md).\n", + "\n", + "![workflow](../../../../_static/tutorials/end2end/base/workflow_create_dataset.svg)\n", + "\n", + "We will start by creating a basic dataset using the [ag_news](https://huggingface.co/datasets/ag_news) dataset as an example and push it to `Argilla` and the Hugging Face `hub`." + ] + }, + { + "cell_type": "markdown", + "id": "e557a44e-26fb-4b52-91ec-2b49d550a5cd", + "metadata": { + "editable": true, + "papermill": { + "duration": 0.011949, + "end_time": "2023-11-15T10:59:17.340523", + "exception": false, + "start_time": "2023-11-15T10:59:17.328574", + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Table of Contents\n", + "\n", + "1. [Configure a FeedbackDataset](#Configure-a-FeedbackDataset)\n", + "2. [Add FeedbackRecords](#Add-FeedbackRecords)\n", + " 1. [From a Hugging Face dataset](#From-a-Hugging-Face-dataset)\n", + " 2. [From a pandas.DataFrame](#From-a-pandas.DataFrame)\n", + "3. [Save and load a FeedbackDataset](#Save-and-load-a-FeedbackDataset)\n", + " 1. [Push our FeedbackDataset to Argilla](#From-Argilla)\n", + " 2. [Push our FeedbackDataset to the Hugging Face hub](#From-Hugging-Face-hub)\n", + "4. [Conclusion](#Conclusion)" + ] + }, + { + "cell_type": "markdown", + "id": "566b7b82", + "metadata": { + "papermill": { + "duration": 0.007744, + "end_time": "2023-11-15T10:59:17.355040", + "exception": false, + "start_time": "2023-11-15T10:59:17.347296", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Running Argilla\n", + "\n", + "For this tutorial, you will need to have an Argilla server running. There are two main options for deploying and running Argilla:\n", + "\n", + "**Deploy Argilla on Hugging Face Spaces:** If you want to run tutorials with external notebooks (e.g., Google Colab) and you have an account on Hugging Face, you can deploy Argilla on Spaces with a few clicks:\n", + "\n", + "[![deploy on spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/deploy-to-spaces-lg.svg)](https://huggingface.co/new-space?template=argilla/argilla-template-space)\n", + "\n", + "For details about configuring your deployment, check the [official Hugging Face Hub guide](https://huggingface.co/docs/hub/spaces-sdks-docker-argilla).\n", + "\n", + "**Launch Argilla using Argilla's quickstart Docker image**: This is the recommended option if you want [Argilla running on your local machine](../../../../getting_started/quickstart.md). Note that this option will only let you run the tutorial locally and not with an external notebook service.\n", + "\n", + "For more information on deployment options, please check the Deployment section of the documentation.\n", + "\n", + "
\n", + "\n", + "Tip\n", + "\n", + "This tutorial is a Jupyter Notebook. There are two options to run it:\n", + "\n", + "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", + "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", + "metadata": { + "papermill": { + "duration": 0.004557, + "end_time": "2023-11-15T10:59:17.364909", + "exception": false, + "start_time": "2023-11-15T10:59:17.360352", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "First let's install our dependencies and import the necessary libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbbd3c38-1e3c-4d7f-97e6-22dd3b668fc8", + "metadata": { + "editable": true, + "papermill": { + "duration": 3.857186, + "end_time": "2023-11-15T10:59:21.228277", + "exception": false, + "start_time": "2023-11-15T10:59:17.371091", + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!pip install argilla\n", + "!pip install datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "51b3b93c", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import argilla as rg\n", + "from argilla._constants import DEFAULT_API_KEY\n", + "from datasets import load_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "c330bed5-38d1-45bf-b871-98e629ab3af8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "In order to run this notebook we will need some credentials to push and load datasets from `Argilla` and 🤗`hub`, let's set them in the following cell:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ca4fd7ae-7e31-405e-84c1-974828a903bd", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Argilla credentials\n", + "api_url = \"http://localhost:6900\" # \"https://.hf.space\"\n", + "api_key = DEFAULT_API_KEY # admin.apikey\n", + "# Huggingface credentials\n", + "hf_token = \"hf_...\"" + ] + }, + { + "cell_type": "markdown", + "id": "3464037d-e21e-4db4-bfb4-e300fe4c9e53", + "metadata": {}, + "source": [ + "Log to argilla:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "728e6af7", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# papermill_description=logging-to-argilla\n", + "rg.init(api_url=api_url, api_key=api_key)" + ] + }, + { + "cell_type": "markdown", + "id": "acae54e37e7d407bbb7b55eff062a284", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../../../reference/telemetry.md) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a63283cbaf04dbcab1f6479b197f3a8", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from argilla.utils.telemetry import tutorial_running\n", + "\n", + " tutorial_running()\n", + "except ImportError:\n", + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "f50177aa-4c94-4b8f-8293-a3710e3cb0ca", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configure a `FeedbackDataset` " + ] + }, + { + "cell_type": "markdown", + "id": "529f4e77-6c05-4d59-b153-170acee36b97", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "For this tutorial we will use the [ag_news](https://huggingface.co/datasets/ag_news) dataset which can be downloaded from the 🤗`hub`. We will load only the first 1000 items from the training sample." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8b01b473-a815-427c-986e-6c2bedf1b5d7", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['text', 'label'],\n", + " num_rows: 1000\n", + "})" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = load_dataset(\"ag_news\", split=\"train[:1000]\")\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "5b4eb4ff-9530-4542-9975-25d0f4f725c1", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "We will just load the first 1000 records for this tutorial, but feel free to test the full dataset." + ] + }, + { + "cell_type": "markdown", + "id": "6d2e7997-62c6-49b3-9462-acd6143346a1", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "This dataset contains a collection of news articles (we can see the content in the `text` column), which have been asigned one of the following classification `labels`: *World (0), Sports (1), Business (2), Sci/Tech (3)*.\n", + "\n", + "Let's use the [task templates](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#task-templates) to create a feedback dataset ready for `text-classification`." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7294b709", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "FeedbackDataset(\n", + " fields=[TextField(name='text', title='Text', required=True, type='text', use_markdown=False)]\n", + " questions=[LabelQuestion(name='label', title='Label', description='Classify the text by selecting the correct label from the given list of labels.', required=True, type='label_selection', labels=['World', 'Sports', 'Business', 'Sci/Tech'], visible_labels=None)]\n", + " guidelines=Classify the articles into one of the four categories.)\n", + " metadata_properties=[])\n", + ")" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feedback_dataset = rg.FeedbackDataset.for_text_classification(\n", + " labels=[\"World\", \"Sports\", \"Business\", \"Sci/Tech\"],\n", + " guidelines=\"Classify the articles into one of the four categories.\",\n", + ")\n", + "feedback_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "5a5238ea", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "We could compare this dataset with the custom configuration we would use previously (we can take a look at the [custom configuration](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#custom-configuration) for more information on the creation of a `FeedbackDataset` when we want a finer control):" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f4823931-39a2-4942-ba83-894d62e0b7cc", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "FeedbackDataset(\n", + " fields=[TextField(name='text', title='Text from the article', required=True, type='text', use_markdown=False)]\n", + " questions=[LabelQuestion(name='label', title='In which category does this article fit?', description=None, required=True, type='label_selection', labels={'World': '0', 'Sports': '1', 'Business': '2', 'Sci/Tech': '3'}, visible_labels=None)]\n", + " guidelines=Classify the articles into one of the four categories.)\n", + " metadata_properties=[])\n", + ")" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feedback_dataset_long = rg.FeedbackDataset(\n", + " guidelines=\"Classify the articles into one of the four categories.\",\n", + " fields=[\n", + " rg.TextField(name=\"text\", title=\"Text from the article\"),\n", + " ],\n", + " questions=[\n", + " rg.LabelQuestion(\n", + " name=\"label\",\n", + " title=\"In which category does this article fit?\",\n", + " labels={\"World\": \"0\", \"Sports\": \"1\", \"Business\": \"2\", \"Sci/Tech\": \"3\"},\n", + " required=True,\n", + " visible_labels=None,\n", + " )\n", + " ],\n", + ")\n", + "feedback_dataset_long" + ] + }, + { + "cell_type": "markdown", + "id": "77ad10bc-d908-41ba-9a5c-c9cc9d56b67f", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Add `FeedbackRecords` \n", + "\n", + "### From a Hugging Face `dataset` " + ] + }, + { + "cell_type": "markdown", + "id": "9c194d68-6a87-4bc0-9d4e-f2afdb0561d9", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "The next step once we have our `FeedbackDataset` created is adding the [FeedbackRecords](https://docs.v1.argilla.io/en/latest/getting_started/cheatsheet.html#create-records) to it." + ] + }, + { + "cell_type": "markdown", + "id": "5110f19b-8bf2-4d48-8a1f-ffd89cfa20f5", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "In order to create our records we can just loop over the items in the `datasets.Dataset`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "825342c7-ef43-4747-8523-18f3e3fc6bd5", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "records = []\n", + "for i, item in enumerate(ds):\n", + " records.append(\n", + " rg.FeedbackRecord(\n", + " fields={\n", + " \"text\": item[\"text\"],\n", + " },\n", + " external_id=f\"record-{i}\",\n", + " )\n", + " )\n", + "\n", + "# We can add an external_id to each record to identify it later." + ] + }, + { + "cell_type": "markdown", + "id": "3eb1ae7a-01f9-439b-87ac-6d0064164c80", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### From a `pandas.DataFrame` " + ] + }, + { + "cell_type": "markdown", + "id": "dba19b65-60d3-4507-aca6-1b4b9bc77d08", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "If we had our data in a different format, let's say a `csv` file, maybe it's more direct to read the data using pandas for that.\n", + "\n", + "We will transform our dataset to pandas format for this example, and the remaining `FeedbackRecord` creation remains just the same:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "82beb35b-264f-491a-a6be-bd53f40fb509", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textlabel
0Wall St. Bears Claw Back Into the Black (Reute...2
1Carlyle Looks Toward Commercial Aerospace (Reu...2
2Oil and Economy Cloud Stocks' Outlook (Reuters...2
3Iraq Halts Oil Exports from Main Southern Pipe...2
4Oil prices soar to all-time record, posing new...2
\n", + "
" + ], + "text/plain": [ + " text label\n", + "0 Wall St. Bears Claw Back Into the Black (Reute... 2\n", + "1 Carlyle Looks Toward Commercial Aerospace (Reu... 2\n", + "2 Oil and Economy Cloud Stocks' Outlook (Reuters... 2\n", + "3 Iraq Halts Oil Exports from Main Southern Pipe... 2\n", + "4 Oil prices soar to all-time record, posing new... 2" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_dataset = ds.to_pandas()\n", + "df_dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c7b33af-297c-44ab-981c-b62cf1406843", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "records_pandas = []\n", + "for i, item in df_dataset.iterrows():\n", + " records_pandas.append(\n", + " rg.FeedbackRecord(\n", + " fields={\n", + " \"text\": item[\"text\"],\n", + " },\n", + " external_id=f\"record-{i}\",\n", + " )\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "dd3f1c38-3b56-46e8-8cac-7b3d21beb38d", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Let's add our records to the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "9bd0ec1c-1fcb-4d4f-a0a2-4f581a670e8b", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "feedback_dataset.add_records(records)" + ] + }, + { + "cell_type": "markdown", + "id": "16fe9266-7a03-4be6-8789-b141854bcdda", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "By now we have our dataset with the texts ready to be labeled, let's push it to `Argilla`." + ] + }, + { + "cell_type": "markdown", + "id": "b867eb5b-0b5a-42b1-88f2-f5b67519243e", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Save and load a `FeedbackDataset` \n", + "\n", + "### From `Argilla` " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f5581dc-8509-484e-ad4e-c73d15227cc9", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# papermill_description=push-dataset-to-argilla\n", + "try:\n", + " # delete old dataset\n", + " remote_dataset = feedback_dataset.from_argilla(\n", + " name=\"end2end_textclassification\", workspace=\"argilla\"\n", + " )\n", + " remote_dataset.delete()\n", + "except:\n", + " pass\n", + "remote_dataset = feedback_dataset.push_to_argilla(\n", + " name=\"end2end_textclassification\", workspace=\"argilla\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "aff0f7fd-722f-4f92-912d-029b134848f3", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "If we go to our `Argilla` instance we should see a similar screen like the following." + ] + }, + { + "cell_type": "markdown", + "id": "827fce93", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![feedback-dataset](../../../../_static/tutorials/end2end/text-classification/feedback-dataset-text-classification-1.png)" + ] + }, + { + "cell_type": "markdown", + "id": "e5f09e2a-e192-4526-a686-b4d516ca5066", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Where we can see the *Text from the article* we wanted, and the different labels to choose from." + ] + }, + { + "cell_type": "markdown", + "id": "fd877169-bd9a-4596-981d-47d2ff4c7c24", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "We can now download the dataset from `Argilla` just to check it:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "fbae2025-c4fe-4e0c-b445-9ee308baa4a5", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RemoteFeedbackDataset(\n", + " id=52b0dfc2-ed85-4805-923c-5d51b51ec4c9\n", + " name=end2end_textclassification\n", + " workspace=Workspace(id=ce760ed7-0fdf-4d79-b9b7-1c0e4ea896cd, name=argilla, inserted_at=2023-11-23 09:46:05.591993, updated_at=2023-11-23 09:46:05.591993)\n", + " url=http://localhost:6900/dataset/52b0dfc2-ed85-4805-923c-5d51b51ec4c9/annotation-mode\n", + " fields=[RemoteTextField(id=UUID('2835bf0e-1259-45b9-a97c-f9b671395563'), client=None, name='text', title='Text', required=True, type='text', use_markdown=False)]\n", + " questions=[RemoteLabelQuestion(id=UUID('bb6fc4f0-e4b7-480c-84a1-df717de4ac97'), client=None, name='label', title='Label', description=None, required=True, type='label_selection', labels=['World', 'Sports', 'Business', 'Sci/Tech'], visible_labels=None)]\n", + " guidelines=Classify the articles into one of the four categories.\n", + " metadata_properties=[]\n", + ")" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_dataset = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification\", workspace=\"argilla\"\n", + ")\n", + "remote_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "1f16d868-a9df-4f01-85ac-06bfcee19101", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### From Hugging Face hub \n", + "\n", + "If we wanted to share our dataset with the world, we could use the Huggingface hub for it." + ] + }, + { + "cell_type": "markdown", + "id": "24aae42a-6b0f-40eb-9d92-4aafcb735844", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "First we need to login to huggingface. The following cell will log us with our previous token.\n", + "\n", + "If we don't have one already, we can obtain it from [here](https://huggingface.co/docs/hub/security-tokens) (remember to set the *write* access)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fabcc89-323f-4f4f-9ab9-4b778dd93474", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from huggingface_hub import login\n", + "\n", + "login(token=hf_token)" + ] + }, + { + "cell_type": "markdown", + "id": "694c381d-7f7d-4b2d-938a-20df260ec957", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "And now we can just call the method on the `FeedbackDataset`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c471559-916c-4bf8-b115-3d1ba0be621b", + "metadata": { + "editable": true, + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# papermill_description=push-dataset-to-huggingface\n", + "remote_dataset.push_to_huggingface(\"argilla/end2end_textclassification\")" + ] + }, + { + "cell_type": "markdown", + "id": "761250bc", + "metadata": {}, + "source": [ + "We can now download the dataset from Hugging Face just to check it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5405ba2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "local_dataset = rg.FeedbackDataset.from_huggingface(\n", + " \"argilla/end2end_textclassification\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "408cb3c9-c784-45c8-b206-058a23b801d8", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Conclusion" + ] + }, + { + "cell_type": "markdown", + "id": "6cdb1244-1589-4e85-be36-e0c959817c6d", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "completed" + }, + "tags": [] + }, + "source": [ + "In this tutorial we created an `Argilla` `FeedbackDataset` for text classification, starting from [ag_news](https://huggingface.co/datasets/ag_news).\n", + "\n", + "We created a `FeedbackDataset` for text classification with a `LabelQuestion`, from data stored as a `datasets.Dataset` and a `pandas.DataFrame`.\n", + "This dataset was pushed both to `Argilla` where we can curate and label the records, and finally pushed it to the 🤗`hub`.\n", + "\n", + "To learn more about how to work with the `FeedbackDataset` check the [cheatsheet](https://docs.v1.argilla.io/en/latest/getting_started/cheatsheet.html#cheatsheet). To continue with assigning records to annotators, you can refer to the [next tutorial](./assign-records-002.ipynb)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "papermill": { + "default_parameters": {}, + "duration": 5.95317, + "end_time": "2023-11-15T10:59:21.949504", + "environment_variables": {}, + "exception": null, + "input_path": "/home/agustin/github_repos/argilla-io/argilla/docs/_source/practical_guides/examples/text_classification/text-classification-create-dataset.ipynb", + "output_path": "/home/agustin/github_repos/argilla-io/argilla/docs/_source/practical_guides/examples/text_classification/text-classification-create-dataset.ipynb", + "parameters": {}, + "start_time": "2023-11-15T10:59:15.996334", + "version": "2.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/filter-and-query-008.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/filter-and-query-008.ipynb index a839262bf7..9cca459466 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/filter-and-query-008.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/filter-and-query-008.ipynb @@ -148,9 +148,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -172,9 +175,15 @@ "metadata": {}, "outputs": [], "source": [ - "dataset_remote_with_metadata = rg.FeedbackDataset.from_argilla(\"end2end_textclassification_with_metadata\")\n", - "dataset_remote_with_vectors = rg.FeedbackDataset.from_argilla(\"end2end_textclassification_with_vectors\")\n", - "dataset_remote_with_suggestions_and_responses = rg.FeedbackDataset.from_argilla(\"end2end_textclassification_with_suggestions_and_responses\")" + "dataset_remote_with_metadata = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification_with_metadata\"\n", + ")\n", + "dataset_remote_with_vectors = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification_with_vectors\"\n", + ")\n", + "dataset_remote_with_suggestions_and_responses = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification_with_suggestions_and_responses\"\n", + ")" ] }, { @@ -296,19 +305,9 @@ "source": [ "filtered_records = dataset_remote_with_metadata.filter_by(\n", " metadata_filters=[\n", - " rg.TermsMetadataFilter(\n", - " name=\"group\",\n", - " values=[\"group-1\", \"group-2\"]\n", - " ),\n", - " rg.IntegerMetadataFilter(\n", - " name=\"length\",\n", - " le=282\n", - " ),\n", - " rg.FloatMetadataFilter(\n", - " name=\"length_std\",\n", - " ge=204, \n", - " le=290\n", - " ), \n", + " rg.TermsMetadataFilter(name=\"group\", values=[\"group-1\", \"group-2\"]),\n", + " rg.IntegerMetadataFilter(name=\"length\", le=282),\n", + " rg.FloatMetadataFilter(name=\"length_std\", ge=204, le=290),\n", " ]\n", ")\n", "\n", @@ -374,7 +373,7 @@ ], "source": [ "filtered_dataset = dataset_remote_with_metadata.filter_by(response_status=[\"submitted\"])\n", - "print('Submitted records:', len(filtered_dataset))" + "print(\"Submitted records:\", len(filtered_dataset))" ] }, { @@ -398,8 +397,10 @@ } ], "source": [ - "filtered_dataset = dataset_remote_with_metadata.filter_by(response_status=[\"missing\", \"draft\"])\n", - "print('Pending records:', len(filtered_dataset))" + "filtered_dataset = dataset_remote_with_metadata.filter_by(\n", + " response_status=[\"missing\", \"draft\"]\n", + ")\n", + "print(\"Pending records:\", len(filtered_dataset))" ] }, { @@ -425,24 +426,14 @@ "source": [ "filtered_dataset = dataset_remote_with_metadata.filter_by(\n", " metadata_filters=[\n", - " rg.TermsMetadataFilter(\n", - " name=\"group\",\n", - " values=[\"group-1\", \"group-2\"]\n", - " ),\n", - " rg.IntegerMetadataFilter(\n", - " name=\"length\",\n", - " le=282\n", - " ),\n", - " rg.FloatMetadataFilter(\n", - " name=\"length_std\",\n", - " ge=204, \n", - " le=290\n", - " ),\n", + " rg.TermsMetadataFilter(name=\"group\", values=[\"group-1\", \"group-2\"]),\n", + " rg.IntegerMetadataFilter(name=\"length\", le=282),\n", + " rg.FloatMetadataFilter(name=\"length_std\", ge=204, le=290),\n", " ],\n", - " response_status=[\"discarded\"]\n", + " response_status=[\"discarded\"],\n", ")\n", "\n", - "print('Discarded records:', len(filtered_dataset))" + "print(\"Discarded records:\", len(filtered_dataset))" ] }, { @@ -491,7 +482,7 @@ "sorted_records = dataset_remote_with_suggestions_and_responses.sort_by(\n", " [\n", " SortBy(field=\"updated_at\", order=\"desc\"),\n", - " SortBy(field=\"metadata.group\", order=\"asc\")\n", + " SortBy(field=\"metadata.group\", order=\"asc\"),\n", " ]\n", ")" ] @@ -514,7 +505,7 @@ ").sort_by(\n", " [\n", " SortBy(field=\"updated_at\", order=\"desc\"),\n", - " SortBy(field=\"metadata.group\", order=\"asc\")\n", + " SortBy(field=\"metadata.group\", order=\"asc\"),\n", " ]\n", ")" ] @@ -544,7 +535,7 @@ "similar_records = dataset_remote_with_vectors.find_similar_records(\n", " vector_name=\"sentence_embedding\",\n", " record=dataset_remote_with_vectors[0],\n", - " max_results=5\n", + " max_results=5,\n", ")" ] }, @@ -598,9 +589,7 @@ "text_vector = model.encode(text).tolist()\n", "\n", "similar_records = dataset_remote_with_vectors.find_similar_records(\n", - " vector_name=\"sentence_embedding\",\n", - " value=text_vector,\n", - " max_results=5\n", + " vector_name=\"sentence_embedding\", value=text_vector, max_results=5\n", ")" ] }, diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/train-model-006.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/train-model-006.ipynb index d8616bc246..a87a92f424 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/train-model-006.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/train-model-006.ipynb @@ -148,9 +148,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -172,7 +175,9 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = rg.FeedbackDataset.from_argilla(\"end2end_textclassification_with_suggestions_and_responses\")" + "dataset = rg.FeedbackDataset.from_argilla(\n", + " \"end2end_textclassification_with_suggestions_and_responses\"\n", + ")" ] }, { @@ -190,7 +195,9 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = rg.FeedbackDataset.from_huggingface(\"argilla/end2end_textclassification_with_suggestions_and_responses\")" + "dataset = rg.FeedbackDataset.from_huggingface(\n", + " \"argilla/end2end_textclassification_with_suggestions_and_responses\"\n", + ")" ] }, { @@ -290,9 +297,7 @@ "metadata": {}, "outputs": [], "source": [ - "trainer.update_config(\n", - " max_steps=1\n", - ")" + "trainer.update_config(max_steps=1)" ] }, { @@ -331,7 +336,7 @@ "def formatting_func(sample):\n", " text = sample[\"text\"]\n", " label = sample[\"label\"][0][\"value\"]\n", - " return(text, label)" + " return (text, label)" ] }, { @@ -384,9 +389,7 @@ "metadata": {}, "outputs": [], "source": [ - "trainer.update_config(\n", - " max_steps=1\n", - ")" + "trainer.update_config(max_steps=1)" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/use-metrics-007.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/use-metrics-007.ipynb index 0914cbf6fd..80c5f35d8a 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/use-metrics-007.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/use-metrics-007.ipynb @@ -149,9 +149,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry module is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry module is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -179,7 +182,9 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = rg.FeedbackDataset.from_huggingface(\"argilla/go_emotions_raw\", split=\"train[:1000]\")" + "dataset = rg.FeedbackDataset.from_huggingface(\n", + " \"argilla/go_emotions_raw\", split=\"train[:1000]\"\n", + ")" ] }, { @@ -218,7 +223,13 @@ ], "source": [ "print(\"text:\", dataset[5].fields[\"text\"])\n", - "print(\"responses:\", [dataset[5].responses[i].values[\"label\"].value for i in range(len(dataset[5].responses))])" + "print(\n", + " \"responses:\",\n", + " [\n", + " dataset[5].responses[i].values[\"label\"].value\n", + " for i in range(len(dataset[5].responses))\n", + " ],\n", + ")" ] }, { @@ -389,7 +400,9 @@ "metadata": {}, "outputs": [], "source": [ - "model_metrics = dataset.compute_model_metrics(question_name=\"label\", metric_names=metric.allowed_metrics)" + "model_metrics = dataset.compute_model_metrics(\n", + " question_name=\"label\", metric_names=metric.allowed_metrics\n", + ")" ] }, { @@ -445,7 +458,11 @@ "metadata": {}, "outputs": [], "source": [ - "model_metrics_unified = dataset.compute_model_metrics(question_name=\"label\", metric_names=[\"accuracy\", \"precision\", \"recall\", \"f1-score\"], strategy=\"majority\")" + "model_metrics_unified = dataset.compute_model_metrics(\n", + " question_name=\"label\",\n", + " metric_names=[\"accuracy\", \"precision\", \"recall\", \"f1-score\"],\n", + " strategy=\"majority\",\n", + ")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb index e29ccb29df..6196d6e146 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb @@ -1,797 +1,837 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "FOyRtBFaD8Ti" - }, - "source": [ - "# 🪄 Fine-tuning and evaluating GPT-3.5 with human feedback for RAG \n", - "\n", - "This guide explains how to fine-tune OpenAI's GPT3.5-turbo with your own data and Argilla to improve a RAG (Retrieval Augmented Generation) system. \n", - "\n", - "It includes the following steps:\n", - "\n", - "- Setting up a RAG pipeline using [LlamaIndex](https://github.com/jerryjliu/llama_index) and [Unstructured](https://github.com/Unstructured-IO/unstructured) to answer questions using a document about Argilla Cloud.\n", - "- Generating potential questions with LlamaIndex to build a training and test set.\n", - "- Building a dataset for collecting human written responses with Argilla.\n", - "- Fine-tuning GPT3.5-turbo with high-quality data.\n", - "- Evaluating the fine-tuned model vs. the base model with human preference data from Argilla.\n", - "\n", - "The goal of the tutorial is to demonstrate how to incorporate human feedback into your LLM development for two critical stages: \n", - "\n", - "1. Gathering **high-quality data for fine-tuning**, \n", - "2. Gathering **human feedback for evaluation of LLM applications**.\n", - "\n", - "\n", - "Given the ongoing debate between Retrieval Augmented Generation (RAG) and fine-tuning, we selected a real-world RAG use case to demonstrate how fine-tuning enhances the style, utility, and relevance of responses within a RAG application. The resulting system will be a Hybrid RAG system (RAG using fine-tuned models) as [described in this article](https://towardsdatascience.com/rag-vs-finetuning-which-is-the-best-tool-to-boost-your-llm-application-94654b1eaba7). \n", - "\n", - "The screenshot below displays the evaluation dataset, termed the \"human preference dataset.\" In it, `response-a` is produced by the fine-tuned model, while `response-b` comes from the base GPT-3.5 model. With just minor fine-tuning and without altering the system message, we've directed the LLM's behavior towards generating responses that are more helpful, faithful, friendly, and aligned with our brand.\n", - "\n", - "Fine-tuning effectively mitigates common RAG challenges, like the LLM referring to the context using phrases such as \"The context does not provide information about this.\" This enhancement is notable even when we had incorporated directives in the system message to deter such references, like \"2. Avoid phrases such as 'Based on the context, ...' or 'The context information ...'.\" (see Llama Index default prompt later).\n", - "\n", - "You can also browse the [datasets hosted with Argilla Hugging Face Spaces](https://huggingface.co/spaces/argilla/fine-tune-chat-gpt). User and password: argilla / 12345678. The dataset for this stage is `customer-assistant` and for the evaluation step is `finetuned-vs-base-preference`.\n", - "\n", - "\n", - "![Human preference](/_static/images/llms/fine-tuning-openai-rag-feedback/preference-fine-tuned.png \"Fine-tuned model vs base model response\")\n", - "\n", - "\n", - "\n", - "\n", - "By the end of the tutorial, you'll be using a fine-tuned model for RAG and have a human evaluation workflow in place to continuously evaluate your LLM application (see below for a comparison of the base gpt3.5 vs. the fine-tuned gpt3.5 for this application).\n", - "\n", - "![Fine-tuned model vs. base model](/_static/images/llms/fine-tuning-openai-rag-feedback/fine-tune-vs-base.png \"Fine-tuned model vs base model\")\n", - "\n", - "\n", - "Let's get started!\n", - "\n", - "## Setup\n", - "\n", - "To run this tutorial, you need to [install and launch Argilla](https://docs.v1.argilla.io/en/latest/getting_started/quickstart_installation.html), as well as some other packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "j76dPBrN3iR_" - }, - "outputs": [], - "source": [ - "%pip install argilla openai datasets llama-index unstructured -qqq" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RI_lnoziIkww" - }, - "outputs": [], - "source": [ - "# Import the needed libraries\n", - "import os\n", - "import random\n", - "from tqdm import tqdm\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import openai\n", - "\n", - "import argilla as rg\n", - "from argilla.feedback import TrainingTask\n", - "from argilla.feedback import ArgillaTrainer\n", - "\n", - "from typing import Union, Tuple, List\n", - "\n", - "from llama_index.core import ServiceContext, VectorStoreIndex, download_loader\n", - "from llama_index.llms.openai import OpenAI\n", - "from llama_index.core.evaluation import DatasetGenerator\n", - "\n", - "from datasets import load_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the URL and API_KEY:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kIrT-iWx3kbX" - }, - "outputs": [], - "source": [ - "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", - "# Replace api_key if you configured a custom API key\n", - "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\",\n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Set the HF_TOKEN environment variable\n", - "# import os\n", - "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", - "\n", - "# # Replace api_url with the url to your HF Spaces URL\n", - "# # Replace api_key if you configured a custom API key\n", - "# # Replace workspace with the name of your workspace\n", - "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", - "# api_key=\"owner.apikey\",\n", - "# workspace=\"admin\",\n", - "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "T45WtJ9t9Vzq" - }, - "outputs": [], - "source": [ - "# Your openAI key is needed for generation and fine-tuning\n", - "os.environ['OPENAI_API_KEY'] = 'sk-...'\n", - "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Enable Telemetry\n", - "\n", - "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../../reference/telemetry.md) page.\n", - "\n", - "```python\n", - "from argilla.utils.telemetry import tutorial_running\n", - "\n", - "tutorial_running()\n", - "```" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "K4ExAXiKSCu5" - }, - "source": [ - "## Generating responses with LlamaIndex and GPT3.5\n", - "\n", - "We generate responses for the generated questions using [this dataset about Argilla Cloud](https://huggingface.co/datasets/argilla/cloud_assistant_questions). We have generated this dataset using a source document and LlamaIndex's question generator (see appendix about how to generate these questions).\n", - "\n", - "If you want to skip this process (it will take several minutes), we have shared the resulting [dataset on Hugging Face](https://huggingface.co/datasets/argilla/customer_assistant).\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "47IBrvmYTKrB" - }, - "outputs": [], - "source": [ - "# Read our source questions\n", - "dataset = load_dataset(\"argilla/cloud_assistant_questions\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PfcOQ67ZSHZK" - }, - "outputs": [], - "source": [ - "# Read and parse the document using Unstructured\n", - "UnstructuredReader = download_loader(\"UnstructuredReader\", refresh_cache=True)\n", - "loader = UnstructuredReader()\n", - "# You can download this doc from: https://huggingface.co/datasets/argilla/cloud_assistant_questions/raw/main/argilla_cloud.txt\n", - "documents = loader.load_data(\"argilla_cloud.txt\")\n", - "\n", - "# Set up the Llama index context\n", - "gpt_35_context = ServiceContext.from_defaults(\n", - " llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0.3)\n", - ")\n", - "\n", - "# Index the document and set up the engine\n", - "index = VectorStoreIndex.from_documents(documents, service_context=gpt_35_context)\n", - "query_engine = index.as_query_engine(similarity_top_k=2)\n", - "\n", - "contexts = []\n", - "answers = []\n", - "questions = dataset[\"train\"][\"question\"]\n", - "\n", - "# Inference over the questions\n", - "for question in tqdm(questions):\n", - " response = query_engine.query(question)\n", - " contexts.append([x.node.get_content() for x in response.source_nodes])\n", - " answers.append(str(response))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UKRsz0WxUBkI", - "outputId": "3288bfed-9ccc-4140-973e-cf7f2e265ba2" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Question: What is the ticketing system used by Argilla for customer support?\n", - "Answer: The ticketing system used by Argilla for customer support is not specified in the given context information.\n", - "Context: [\"This process ensures the client administrator has full control over their team's access and can manage their workspace efficiently.Plans The plans for the Argilla Cloud service depend on the volume of records processed, with several tiers available to suit varying needs.Each tier has a corresponding monthly and annual price, with a 10% discount applied to the annual pricing option.The tier selection and associated price will be determined by the client's selection in the Service Order Form section of the Terms of Service document.Plans are: Starter 1 Million records Base 3 Million records Medium 4 Million records Large 6 million records\\n\\nSupport Argilla Cloud offers comprehensive support services to address various issues that may arise during the use of our service.Support levels are categorized into four distinct tiers, based on the severity of the issue, and a separate category for feature requests.The support process, response times, and procedures differ for each category.(1) Critical Issues Critical issues are characterized by: Severe impact on the Service, potentially rendering it completely non-functional.Disruption of critical service operations or functions.Obstruction of entire customer workflows.In the case of a critical issue, Argilla will: Assign specialist(s) to correct the issue on an expedited basis.Provide ongoing communication on the status via email and/or phone, according to the customer's preference.Begin work towards identifying a temporary workaround or fix.(2) Major Issues Major issues involve: Limited functionality of the Service.Service instability with periodic interruptions.Material service interruptions in mission-critical functions.Time-sensitive questions impacting performance or deliverables to end-clients.Upon encountering a major issue, Argilla will: Assign a specialist to begin a resolution.Implement additional, escalated procedures as reasonably determined necessary by Argilla Support Services staff.(3) Minor Issues Minor issues include: Errors causing partial, non-critical functionality loss.The need for clarification on procedures or information in documentation.Errors in service that may impact performance deliverables.(4) Trivial Issues Trivial issues are characterized by: Errors in system development with little to no impact on performance.Feature Requests Feature requests involve: Requesting a product enhancement.For feature requests, Argilla will: Respond regarding the relevance and interest in incorporating the requested feature.In summary, Argilla Cloud's support services are designed to provide timely and efficient assistance for issues of varying severity, ensuring a smooth and reliable user experience.All plans include Monday to Friday during office hours (8am to 17pm CEST) with additional support upon request.The Support Channels and features of each tier are shown below:\\n\\nStarter: Slack Community.Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 48 hours.Severity 4 not specified.Base: Ticketing System, Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 24 hours.Severity 4 not specified.Medium: Ticketing System and dedicated Slack channel, Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 24 hours.Severity 4 one week\\n\\nLarge: Ticketing System and dedicated Slack channel, Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 24 hours.Severity 4 one week.Data backup and recovery plan Argilla Cloud is committed to ensuring the safety and availability of your data.Our system is designed to run six data backups per day as a standard procedure.These backups capture a snapshot of the system state at the time of the backup, enabling restoration to that point if necessary.Our Recovery Point Objective (RPO) is four hours.This means that in the event of a system failure, the maximum data loss would be up to the last four hours of data input.We achieve this by running regular backups throughout the day, reducing the time window of potential data loss.Our Recovery Time Objective (RTO) is one hour.This is the maximum acceptable length of time that your system could be down following a failure or disruption.It represents our commitment to ensuring that your services are restored as quickly as possible.In the event of a disruption, our team will first evaluate the issue to determine the best course of action.If data recovery is necessary, we will restore from the most recent backup.We will then work to identify and resolve the root cause of the disruption to prevent a recurrence.Finally, we conduct regular test restores to ensure that our backup system is working as intended.These tests verify the integrity of the backup data and the functionality of the restore process.\", \"This documents an overview of the Argilla Cloud service - a comprehensive Software as a Service (SaaS) solution for data labeling and curation.The service is specifically designed to meet the needs of businesses seeking a reliable, secure, and user-friendly platform for data management.The key components of our service include advanced security measures, robust data backup and recovery protocols, flexible pricing options, and dedicated customer support.The onboarding process is efficient, enabling clients to start using the service within one business day.The scope of this proposal includes details on the aforementioned aspects, providing a clear understanding of the service offerings and associated processes.Argilla Cloud offers four plans:\\n\\nStarter: Ideal for teams initiating their journey in scaling data curation and labelling projects.Perfect for environments where production monitoring is not a requirement.Base: Tailored for teams seeking to amplify their data curation, labelling efforts, and model monitoring, with enhanced support from Argilla.Medium: Designed for teams expanding their language model pipelines, requiring robust ML lifecycle management fortified by Argilla's comprehensive support.Large: Geared towards teams heavily dependent on language model pipelines, human feedback, and applications, requiring complete ML lifecycle management with robust support.Scope of services Argilla Cloud, a fully managed SaaS, encompasses the following functionalities: Unrestricted Users, Datasets, and Workspaces: The service imposes no limits on the number of users, datasets, or workspaces, supporting scalability of operations.Role-Based Access Control: Administrators and annotators have differentiated access rights to ensure structured and secure data management.Custom Subdomain: Clients are provided with a distinct argilla.io subdomain for accessing the platform.Regular Updates and Upgrades: The service includes regular platform patches and upgrades as part of routine maintenance to uphold system integrity and security.Managed Service: Infrastructure maintenance, backend operations, and other technical aspects are managed by Argilla, eliminating the need for client-side management.Security The security framework of the Argilla Cloud service involves a multi-faceted approach: Data Encryption at Rest: Data stored within the system is encrypted, forming a crucial layer of security.This process automatically encrypts data prior to storage, guarding against unauthorized access.Network Security Measures: The infrastructure has been designed to prevent unauthorized intrusion and to ensure consistent service availability.Measures include firewall protections, intrusion detection systems, and scheduled vulnerability scans to detect and address potential threats.Role-Based Access Control: The system implements role-based access control, defining access levels based on user roles.This mechanism controls the extent of access to sensitive information, aligning it with the responsibilities of each role.Security Audits: Regular audits of security systems and protocols are conducted to detect potential vulnerabilities and verify adherence to security standards.Employee Training: All personnel receive regular security training, fostering an understanding of the latest threats and the importance of security best practices.Incident Response Protocol: In the case of a security incident, a pre-defined incident response plan is activated.This plan outlines the procedures for managing different types of security events, and aims to ensure swift mitigation of potential damage.In summary, the security measures in place include data encryption, network security protocols, role-based access control, regular audits, employee training, and a comprehensive incident response plan.These measures contribute to a secure environment for data management.Setup and onboarding The process for setup and onboarding for Argilla Cloud is designed to be efficient and straightforward.The procedure involves a sequence of steps to ensure a smooth transition and optimal use of the service.Step 1: Account Creation The setup process begins with the creation of the client owner account.We require the client to provide the following details: Full name of the administrator Preferred username Administrator's email address Once these details are received, we send an onboarding email to sign up.Step 2: Platform Orientation Once logged in, the administrator has full access to the Argilla Cloud platform.They can familiarize themselves with the platform interface and various features.If required, a guided tour or tutorial can be provided to walk the administrator through the platform.Step 3: User Management The administrator is then responsible for setting up additional user accounts.They can invite users via email, manage roles (admin, annotator, etc.), and assign access permissions to different workspaces and datasets.Step 4: Workspace and Dataset Configuration The administrator can create and manage multiple workspaces and datasets.They have the option to configure settings as per their team's requirements, including assigning datasets to specific workspaces and managing access permissions.Step 5: Training and Support Argilla provides open resources and support to aid in the onboarding process.This includes user manuals, tutorials, and access to our support team for any queries or issues that may arise during the setup and onboarding process.By following these steps, new users can be quickly onboarded and begin using the Argilla Cloud service with minimal downtime.\"]\n" - ] - } - ], - "source": [ - "# Show an example of q, a, and context\n", - "print(f\"Question: {questions[0]}\")\n", - "print(f\"Answer: {answers[0]}\")\n", - "print(f\"Context: {contexts[0]}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "y6Bavj4Abkwa" - }, - "source": [ - "## Create Argilla dataset and collect feedback\n", - "\n", - "\n", - "We set up an Argilla Dataset for gathering human feedback.\n", - "\n", - "\n", - "For fine-tuning, we need to set up a text question to gather the human written or edited responses. This data is known as completion or demonstration data.\n", - "\n", - "Additionally, leveraging the multi-aspect feedback capabilities of Argilla, we set up two additional feedback dimensions to rate the relevance of the question (as they're synthetic they might be irrelevant or bad quality) and the quality of the context retrieved from our retriever component (can be used to improve the RAG configuration).\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ID3Qu9aVbtaX" - }, - "outputs": [], - "source": [ - "dataset = rg.FeedbackDataset(\n", - " fields=[rg.TextField(name=\"user-message\"), rg.TextField(name=\"context\")],\n", - " questions=[\n", - " rg.RatingQuestion(name=\"question-rating\", title=\"Rate the relevance of the user question\", values=[1,2,3,4,5], required=False),\n", - " rg.RatingQuestion(name=\"context-rating\", title=\"Rate the quality and relevancy of context for the assistant\", values=[1,2,3,4,5], required=False),\n", - " rg.TextQuestion(name=\"response\", title=\"Write a helpful, harmless, accurate response to the user question\"),\n", - " ]\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "btwbDF6_PayX" - }, - "source": [ - "We use the questions, context, and generated responses to build our feedback records. We pre-fill the responses in the UI with OpenAI's responses using `suggestions` and ask our labelers to edit them if necessary." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bW63w95KcfwS" - }, - "outputs": [], - "source": [ - "records = []\n", - "\n", - "for question, answer, context in tqdm(zip(questions, answers, contexts), total=len(questions)):\n", - " # Instantiate the FeedbackRecord\n", - " feedback_record = rg.FeedbackRecord(\n", - " fields={\"user-message\": question, \"context\": \"\\n\".join(context)},\n", - " suggestions=[\n", - " {\n", - " \"question_name\": \"response\",\n", - " \"value\": answer,\n", - " }\n", - " ]\n", - " )\n", - " records.append(feedback_record)\n", - "\n", - "# Publish dataset in Argilla UI\n", - "dataset = dataset.push_to_argilla(name=\"customer_assistant\", workspace=\"admin\")\n", - "dataset.add_records(records)\n", - "\n", - "# Optional: store and version dataset in the Hub\n", - "#dataset = dataset.push_to_huggingface(\"argilla/rg_customer_assistant\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, the dataset is available for collecting feedback with the Argilla UI. Here's a video showing the workflow for labelers:\n", - "\n", - "" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "9yLxmSDsQPts" - }, - "source": [ - "## Prepare Argilla dataset for fine-tuning\n", - "\n", - "\n", - "We now read the responses from Argilla and prepare the dataset for fine-tuning following the [fine-tuning format from OpenAI guides](https://platform.openai.com/docs/guides/fine-tuning).\n", - "\n", - "We use the quick adaptation of LlamaIndex's `TEXT_QA_PROMPT` system prompt and the fine-tuned responses from our Argilla dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Read the dataset from Argilla\n", - "dataset = rg.FeedbackDataset.from_argilla(\"customer_assistant\", workspace=\"admin\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you have skipped the previous steps run this to get the pre-built dataset.\n", - "\n", - "```python\n", - "dataset = rg.FeedbackDataset.from_huggingface(\"argilla/customer_assistant\")\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "N5hFLJaHSz9C" - }, - "outputs": [], - "source": [ - "# Adaptation from LlamaIndex's TEXT_QA_PROMPT_TMPL_MSGS[1].content\n", - "user_message_prompt =\"\"\"Context information is below.\n", - "---------------------\n", - "{context_str}\n", - "---------------------\n", - "Given the context information and not prior knowledge but keeping your Argilla Cloud assistant style, answer the query.\n", - "Query: {query_str}\n", - "Answer:\n", - "\"\"\"\n", - "# Adaptation from LlamaIndex's TEXT_QA_SYSTEM_PROMPT\n", - "system_prompt = \"\"\"You are an expert customer service assistant for the Argilla Cloud product that is trusted around the world.\n", - "Always answer the query using the provided context information, and not prior knowledge.\n", - "Some rules to follow:\n", - "1. Never directly reference the given context in your answer.\n", - "2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "siPKLnctRLpD" - }, - "outputs": [], - "source": [ - "def formatting_func(sample: dict) -> Union[Tuple[str, str, str, str], List[Tuple[str, str, str, str]]]:\n", - " from uuid import uuid4\n", - " if sample[\"response\"]:\n", - " chat = str(uuid4())\n", - " user_message = user_message_prompt.format(context_str=sample[\"context\"], query_str=sample[\"user-message\"])\n", - " return [\n", - " (chat, \"0\", \"system\", system_prompt),\n", - " (chat, \"1\", \"user\", user_message),\n", - " (chat, \"2\", \"assistant\", sample[\"response\"][0][\"value\"])\n", - " ]\n", - "\n", - "task = TrainingTask.for_chat_completion(formatting_func=formatting_func)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "SVvdU9EqVWSR" - }, - "source": [ - "## Fine-tune GPT3.5 with high-quality feedback\n", - "\n", - "\n", - "We fine-tune `gpt-3.5-turbo` with the exported dataset using the Argilla Trainer." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "afLyk289VhBL" - }, - "outputs": [], - "source": [ - "trainer = ArgillaTrainer(\n", - " dataset=dataset,\n", - " task=task,\n", - " framework=\"openai\",\n", - ")\n", - "trainer.train(output_dir=\"my-ft-openai-model\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "eG-zXJJ2dbi_" - }, - "source": [ - "## Evaluating base vs fine-tuned with human preference data\n", - "\n", - "We set up a new feedback dataset for gathering human feedback to evaluate the fine-tuned model against the base model, using the test dataset.\n", - "\n", - "There are many ways to collect feedback for this phase. The most suitable in this case is human preference data over responses from the two models: *asking our labelers which response is the most accurate and helpful*. We can easily do this with Argilla's `RankingQuestion`.\n", - "\n", - "Additionally, as both responses can be equally bad, we can ask labelers to write down a correct response. In this case, we would be collecting demonstration data to add to our fine-tuning workflow.\n", - "\n", - "\n", - "### Create dataset and collect feedback\n", - "\n", - "We set up and publish a new dataset with a `RankingQuestion` and `TextQuestion`, showing our labelers the `user-message` and two responses (from the base and the fine-tuned models)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tsQT0Ps1igZc" - }, - "outputs": [], - "source": [ - "dataset = rg.FeedbackDataset(\n", - " fields=[rg.TextField(name=\"user-message\"), rg.TextField(name=\"response-a\"), rg.TextField(name=\"response-b\")],\n", - " questions=[\n", - " rg.RankingQuestion(name=\"preference\", title=\"Which response is more helpful, harmless, and accurate.\", values=[\"response-a\", \"response-b\"]),\n", - " rg.TextQuestion(name=\"response\", title=\"If none is good, write a helpful, harmless, accurate response to the user question\", required=False),\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "95-iBx8fgoGg" - }, - "outputs": [], - "source": [ - "# Read our test questions\n", - "questions = load_dataset(\"argilla/cloud_assistant_questions\", split=\"test\")[\"question\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "background_save": true - }, - "id": "NPPLoZGae9ia" - }, - "outputs": [], - "source": [ - "# Generate responses with base model\n", - "index = VectorStoreIndex.from_documents(documents, service_context=gpt_35_context)\n", - "query_engine = index.as_query_engine(similarity_top_k=2)\n", - "\n", - "contexts = []\n", - "base_model_responses = []\n", - "\n", - "for question in tqdm(questions):\n", - " response = query_engine.query(question)\n", - " base_model_responses.append(str(response))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AU9NwKnQgAPu" - }, - "outputs": [], - "source": [ - "# Generate responses with ft model: replace with the id of your ft model\n", - "ft_context = ServiceContext.from_defaults(\n", - " llm=OpenAI(model=\"ft:gpt-3.5-turbo-...\", temperature=0.3)\n", - ")\n", - "index = VectorStoreIndex.from_documents(documents, service_context=ft_context)\n", - "query_engine = index.as_query_engine(similarity_top_k=2)\n", - "\n", - "contexts = []\n", - "ft_model_responses = []\n", - "\n", - "for question in tqdm(questions):\n", - " response = query_engine.query(question)\n", - " ft_model_responses.append(str(response))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "hsPoYfgJTCOD" - }, - "source": [ - "An important step here is to randomize the order in which responses are shown.\n", - "\n", - "If we show the fine-tuned model response always as the first option, we can introduce position bias (labelers always choosing a certain position) or make it evident to users that there are two obviously different models.\n", - "\n", - "To avoid this, we randomize the position and keep two metadata fields indicating which model has produced `response-a` and `response-b`. When collecting the responses, we'll use this metadata to map the ranking with each model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0zvH3sDkhMFJ" - }, - "outputs": [], - "source": [ - "records = []\n", - "for base, ft, question in zip(base_model_responses, ft_model_responses, questions):\n", - " # Randomizing the position is a highly important step to mitigate labeler biases\n", - " # Shuffle the order of base and ft\n", - " response_a, response_b = random.sample([base, ft], 2)\n", - "\n", - " # Map the responses back to their model names\n", - " models = {\n", - " base: \"base_model\",\n", - " ft: \"ft_model\"\n", - " }\n", - " feedback_record = rg.FeedbackRecord(\n", - " fields={\"user-message\": question, \"response-a\": response_a, \"response-b\": response_b},\n", - " metadata={\"response-a-model\": models[response_a], \"response-b-model\": models[response_b]}\n", - " )\n", - "\n", - " records.append(feedback_record)\n", - "\n", - "dataset = dataset.push_to_argilla(name=\"finetuned-vs-base-preference\", workspace=\"admin\")\n", - "dataset.add_records(records)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, the dataset is available for collecting feedback with the Argilla UI. Here's a video showing the workflow for labelers:\n", - "\n", - "" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "eRiVf39Bblmh" - }, - "source": [ - "### Retrieve and analyze responses\n", - "\n", - "We can dynamically collect the responses from our labelers. In this case, we will compute the win rate and ties (as users can indicate both responses are equally good or bad).\n", - "\n", - "For the tutorial, we only have one user but Argilla Feedback is fully multi-user, which means you can collect feedback from several users for each data point, increasing the quality of the evaluation. \n", - "\n", - "You can read more about multi-user scenarios and built-in unification methods [on this guide](/practical_guides/collect_responses.md).\n", - "\n", - "With a very small evaluation set, we can see that the fine-tuned model responses are preferred ~60% of the time, 3x over the base model, and they are both equally good or bad ~20% of the time. \n", - "\n", - "\n", - "Even with a very small fine-tuning and evaluation dataset, this already shows **promising benefits of fine-tuning models for enhancing RAG systems**.\n", - "\n", - "\n", - "![Fine-tuned model vs base model](/_static/images/llms/fine-tuning-openai-rag-feedback/fine-tune-vs-base.png \"Fine-tuned model vs base model\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XZKBhlKCbwUV" - }, - "outputs": [], - "source": [ - "# Retrieve the dataset from Argilla\n", - "dataset = rg.FeedbackDataset.from_argilla(name=\"finetuned-vs-base-preference\", workspace=\"admin\")\n", - "\n", - "win_rates = {\n", - " 'ft_model': 0,\n", - " 'base_model': 0,\n", - " 'tie': 0\n", - "}\n", - "\n", - "# Compute the win and tie rates\n", - "for record in dataset.records:\n", - " if len(record.responses) > 0:\n", - " for response in record.responses:\n", - " model_a = record.metadata[\"response-a-model\"]\n", - " model_b = record.metadata[\"response-b-model\"]\n", - " preference = response.values['preference'].value\n", - " if preference[0].rank > preference[1].rank:\n", - " win_rates[model_a] = win_rates[model_a] + 1\n", - " elif preference[1].rank > preference[0].rank:\n", - " win_rates[model_b] = win_rates[model_b] + 1\n", - " else:\n", - " win_rates['tie'] = win_rates['tie'] + 1\n", - "win_rates\n", - "# {'ft_model': 17, 'base_model': 6, 'tie': 5}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BpoH-OnRnuoL" - }, - "outputs": [], - "source": [ - "# Let's make the labels more explicit\n", - "data = {'gpt3.5-fine-tuned': 17, 'gpt3.5-base': 6, 'tie': 5}\n", - "total = sum(data.values())\n", - "\n", - "# Calculate percentages\n", - "percentages = [value / total * 100 for value in data.values()]\n", - "\n", - "# Settings\n", - "colors = ['blue', 'grey', 'black']\n", - "labels = [f\"{key} ({value:.2f}%)\" for key, value in zip(data.keys(), percentages)]\n", - "\n", - "# Plotting\n", - "plt.figure(figsize=(12, 2))\n", - "\n", - "# The cumulative percentage is used to shift the starting point of each subsequent segment\n", - "cumulative_percentages = 0\n", - "\n", - "for percent, color, label in zip(percentages, colors, labels):\n", - " plt.barh('Models', percent, color=color, label=label, left=cumulative_percentages)\n", - " plt.text(cumulative_percentages + percent/2, 0, label, ha='center', va='center', color='white', fontsize=10)\n", - " cumulative_percentages += percent\n", - "\n", - "plt.gca().axes.get_yaxis().set_visible(False)\n", - "plt.xlim(0, 100)\n", - "plt.title('Model Win Rates')\n", - "plt.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.25), ncol=3)\n", - "plt.tight_layout()\n", - "\n", - "# Display\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "hCrnoTw_GIyr" - }, - "source": [ - "## Appendix: Generating questions with Llama Index\n", - "\n", - "\n", - "We use the `DatasetGenerator` from Llama Index to generate a set of questions using a document about Argilla Cloud." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kf-hZurHckDf" - }, - "outputs": [], - "source": [ - "UnstructuredReader = download_loader(\"UnstructuredReader\", refresh_cache=True)\n", - "loader = UnstructuredReader()\n", - "\n", - "# You can download this doc from: https://huggingface.co/datasets/argilla/cloud_assistant_questions/raw/main/argilla_cloud.txt\n", - "documents = loader.load_data(\"argilla_cloud.txt\")\n", - "\n", - "gpt_35_context = ServiceContext.from_defaults(\n", - " llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0.4),\n", - " chunk_size=60\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "wSVCTgEsc1zm" - }, - "outputs": [], - "source": [ - "question_gen_query = (\n", - " \"You are customer support and sales expert of Argilla. Your task is to setup \"\n", - " \"a set of frequently asked questions about the Argilla Cloud service, offer and plans\"\n", - " \"formulate a single question that could be asked by a potential B2B client interested in Argilla Cloud \"\n", - " \". Restrict the question to the context information provided and don't ask general questions not related to the service and the context provided.\"\n", - ")\n", - "\n", - "dataset_generator = DatasetGenerator.from_documents(\n", - " documents,\n", - " question_gen_query=question_gen_query,\n", - " service_context=gpt_35_context,\n", - " num_questions_per_chunk=100\n", - ")\n", - "\n", - "questions = dataset_generator.generate_questions_from_nodes(num=300)" - ] - } - ], - "metadata": { + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "FOyRtBFaD8Ti" + }, + "source": [ + "# 🪄 Fine-tuning and evaluating GPT-3.5 with human feedback for RAG \n", + "\n", + "This guide explains how to fine-tune OpenAI's GPT3.5-turbo with your own data and Argilla to improve a RAG (Retrieval Augmented Generation) system. \n", + "\n", + "It includes the following steps:\n", + "\n", + "- Setting up a RAG pipeline using [LlamaIndex](https://github.com/jerryjliu/llama_index) and [Unstructured](https://github.com/Unstructured-IO/unstructured) to answer questions using a document about Argilla Cloud.\n", + "- Generating potential questions with LlamaIndex to build a training and test set.\n", + "- Building a dataset for collecting human written responses with Argilla.\n", + "- Fine-tuning GPT3.5-turbo with high-quality data.\n", + "- Evaluating the fine-tuned model vs. the base model with human preference data from Argilla.\n", + "\n", + "The goal of the tutorial is to demonstrate how to incorporate human feedback into your LLM development for two critical stages: \n", + "\n", + "1. Gathering **high-quality data for fine-tuning**, \n", + "2. Gathering **human feedback for evaluation of LLM applications**.\n", + "\n", + "\n", + "Given the ongoing debate between Retrieval Augmented Generation (RAG) and fine-tuning, we selected a real-world RAG use case to demonstrate how fine-tuning enhances the style, utility, and relevance of responses within a RAG application. The resulting system will be a Hybrid RAG system (RAG using fine-tuned models) as [described in this article](https://towardsdatascience.com/rag-vs-finetuning-which-is-the-best-tool-to-boost-your-llm-application-94654b1eaba7). \n", + "\n", + "The screenshot below displays the evaluation dataset, termed the \"human preference dataset.\" In it, `response-a` is produced by the fine-tuned model, while `response-b` comes from the base GPT-3.5 model. With just minor fine-tuning and without altering the system message, we've directed the LLM's behavior towards generating responses that are more helpful, faithful, friendly, and aligned with our brand.\n", + "\n", + "Fine-tuning effectively mitigates common RAG challenges, like the LLM referring to the context using phrases such as \"The context does not provide information about this.\" This enhancement is notable even when we had incorporated directives in the system message to deter such references, like \"2. Avoid phrases such as 'Based on the context, ...' or 'The context information ...'.\" (see Llama Index default prompt later).\n", + "\n", + "You can also browse the [datasets hosted with Argilla Hugging Face Spaces](https://huggingface.co/spaces/argilla/fine-tune-chat-gpt). User and password: argilla / 12345678. The dataset for this stage is `customer-assistant` and for the evaluation step is `finetuned-vs-base-preference`.\n", + "\n", + "\n", + "![Human preference](/_static/images/llms/fine-tuning-openai-rag-feedback/preference-fine-tuned.png \"Fine-tuned model vs base model response\")\n", + "\n", + "\n", + "\n", + "\n", + "By the end of the tutorial, you'll be using a fine-tuned model for RAG and have a human evaluation workflow in place to continuously evaluate your LLM application (see below for a comparison of the base gpt3.5 vs. the fine-tuned gpt3.5 for this application).\n", + "\n", + "![Fine-tuned model vs. base model](/_static/images/llms/fine-tuning-openai-rag-feedback/fine-tune-vs-base.png \"Fine-tuned model vs base model\")\n", + "\n", + "\n", + "Let's get started!\n", + "\n", + "## Setup\n", + "\n", + "To run this tutorial, you need to [install and launch Argilla](https://docs.v1.argilla.io/en/latest/getting_started/quickstart_installation.html), as well as some other packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j76dPBrN3iR_" + }, + "outputs": [], + "source": [ + "%pip install argilla openai datasets llama-index unstructured -qqq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RI_lnoziIkww" + }, + "outputs": [], + "source": [ + "# Import the needed libraries\n", + "import os\n", + "import random\n", + "from tqdm import tqdm\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import openai\n", + "\n", + "import argilla as rg\n", + "from argilla.feedback import TrainingTask\n", + "from argilla.feedback import ArgillaTrainer\n", + "\n", + "from typing import Union, Tuple, List\n", + "\n", + "from llama_index.core import ServiceContext, VectorStoreIndex, download_loader\n", + "from llama_index.llms.openai import OpenAI\n", + "from llama_index.core.evaluation import DatasetGenerator\n", + "\n", + "from datasets import load_dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the URL and API_KEY:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kIrT-iWx3kbX" + }, + "outputs": [], + "source": [ + "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", + "# Replace api_key if you configured a custom API key\n", + "# Replace workspace with the name of your workspace\n", + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Set the HF_TOKEN environment variable\n", + "# import os\n", + "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", + "\n", + "# # Replace api_url with the url to your HF Spaces URL\n", + "# # Replace api_key if you configured a custom API key\n", + "# # Replace workspace with the name of your workspace\n", + "# rg.init(\n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", + "# api_key=\"owner.apikey\",\n", + "# workspace=\"admin\",\n", + "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T45WtJ9t9Vzq" + }, + "outputs": [], + "source": [ + "# Your openAI key is needed for generation and fine-tuning\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n", + "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../../reference/telemetry.md) page.\n", + "\n", + "```python\n", + "from argilla.utils.telemetry import tutorial_running\n", + "\n", + "tutorial_running()\n", + "```" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "K4ExAXiKSCu5" + }, + "source": [ + "## Generating responses with LlamaIndex and GPT3.5\n", + "\n", + "We generate responses for the generated questions using [this dataset about Argilla Cloud](https://huggingface.co/datasets/argilla/cloud_assistant_questions). We have generated this dataset using a source document and LlamaIndex's question generator (see appendix about how to generate these questions).\n", + "\n", + "If you want to skip this process (it will take several minutes), we have shared the resulting [dataset on Hugging Face](https://huggingface.co/datasets/argilla/customer_assistant).\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "47IBrvmYTKrB" + }, + "outputs": [], + "source": [ + "# Read our source questions\n", + "dataset = load_dataset(\"argilla/cloud_assistant_questions\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PfcOQ67ZSHZK" + }, + "outputs": [], + "source": [ + "# Read and parse the document using Unstructured\n", + "UnstructuredReader = download_loader(\"UnstructuredReader\", refresh_cache=True)\n", + "loader = UnstructuredReader()\n", + "# You can download this doc from: https://huggingface.co/datasets/argilla/cloud_assistant_questions/raw/main/argilla_cloud.txt\n", + "documents = loader.load_data(\"argilla_cloud.txt\")\n", + "\n", + "# Set up the Llama index context\n", + "gpt_35_context = ServiceContext.from_defaults(\n", + " llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0.3)\n", + ")\n", + "\n", + "# Index the document and set up the engine\n", + "index = VectorStoreIndex.from_documents(documents, service_context=gpt_35_context)\n", + "query_engine = index.as_query_engine(similarity_top_k=2)\n", + "\n", + "contexts = []\n", + "answers = []\n", + "questions = dataset[\"train\"][\"question\"]\n", + "\n", + "# Inference over the questions\n", + "for question in tqdm(questions):\n", + " response = query_engine.query(question)\n", + " contexts.append([x.node.get_content() for x in response.source_nodes])\n", + " answers.append(str(response))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" + "base_uri": "https://localhost:8080/" + }, + "id": "UKRsz0WxUBkI", + "outputId": "3288bfed-9ccc-4140-973e-cf7f2e265ba2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: What is the ticketing system used by Argilla for customer support?\n", + "Answer: The ticketing system used by Argilla for customer support is not specified in the given context information.\n", + "Context: [\"This process ensures the client administrator has full control over their team's access and can manage their workspace efficiently.Plans The plans for the Argilla Cloud service depend on the volume of records processed, with several tiers available to suit varying needs.Each tier has a corresponding monthly and annual price, with a 10% discount applied to the annual pricing option.The tier selection and associated price will be determined by the client's selection in the Service Order Form section of the Terms of Service document.Plans are: Starter 1 Million records Base 3 Million records Medium 4 Million records Large 6 million records\\n\\nSupport Argilla Cloud offers comprehensive support services to address various issues that may arise during the use of our service.Support levels are categorized into four distinct tiers, based on the severity of the issue, and a separate category for feature requests.The support process, response times, and procedures differ for each category.(1) Critical Issues Critical issues are characterized by: Severe impact on the Service, potentially rendering it completely non-functional.Disruption of critical service operations or functions.Obstruction of entire customer workflows.In the case of a critical issue, Argilla will: Assign specialist(s) to correct the issue on an expedited basis.Provide ongoing communication on the status via email and/or phone, according to the customer's preference.Begin work towards identifying a temporary workaround or fix.(2) Major Issues Major issues involve: Limited functionality of the Service.Service instability with periodic interruptions.Material service interruptions in mission-critical functions.Time-sensitive questions impacting performance or deliverables to end-clients.Upon encountering a major issue, Argilla will: Assign a specialist to begin a resolution.Implement additional, escalated procedures as reasonably determined necessary by Argilla Support Services staff.(3) Minor Issues Minor issues include: Errors causing partial, non-critical functionality loss.The need for clarification on procedures or information in documentation.Errors in service that may impact performance deliverables.(4) Trivial Issues Trivial issues are characterized by: Errors in system development with little to no impact on performance.Feature Requests Feature requests involve: Requesting a product enhancement.For feature requests, Argilla will: Respond regarding the relevance and interest in incorporating the requested feature.In summary, Argilla Cloud's support services are designed to provide timely and efficient assistance for issues of varying severity, ensuring a smooth and reliable user experience.All plans include Monday to Friday during office hours (8am to 17pm CEST) with additional support upon request.The Support Channels and features of each tier are shown below:\\n\\nStarter: Slack Community.Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 48 hours.Severity 4 not specified.Base: Ticketing System, Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 24 hours.Severity 4 not specified.Medium: Ticketing System and dedicated Slack channel, Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 24 hours.Severity 4 one week\\n\\nLarge: Ticketing System and dedicated Slack channel, Severity 1 - Response time < 4 hours.Severity 2 - Response time < 8 hours.Severity 3 - Response time < 24 hours.Severity 4 one week.Data backup and recovery plan Argilla Cloud is committed to ensuring the safety and availability of your data.Our system is designed to run six data backups per day as a standard procedure.These backups capture a snapshot of the system state at the time of the backup, enabling restoration to that point if necessary.Our Recovery Point Objective (RPO) is four hours.This means that in the event of a system failure, the maximum data loss would be up to the last four hours of data input.We achieve this by running regular backups throughout the day, reducing the time window of potential data loss.Our Recovery Time Objective (RTO) is one hour.This is the maximum acceptable length of time that your system could be down following a failure or disruption.It represents our commitment to ensuring that your services are restored as quickly as possible.In the event of a disruption, our team will first evaluate the issue to determine the best course of action.If data recovery is necessary, we will restore from the most recent backup.We will then work to identify and resolve the root cause of the disruption to prevent a recurrence.Finally, we conduct regular test restores to ensure that our backup system is working as intended.These tests verify the integrity of the backup data and the functionality of the restore process.\", \"This documents an overview of the Argilla Cloud service - a comprehensive Software as a Service (SaaS) solution for data labeling and curation.The service is specifically designed to meet the needs of businesses seeking a reliable, secure, and user-friendly platform for data management.The key components of our service include advanced security measures, robust data backup and recovery protocols, flexible pricing options, and dedicated customer support.The onboarding process is efficient, enabling clients to start using the service within one business day.The scope of this proposal includes details on the aforementioned aspects, providing a clear understanding of the service offerings and associated processes.Argilla Cloud offers four plans:\\n\\nStarter: Ideal for teams initiating their journey in scaling data curation and labelling projects.Perfect for environments where production monitoring is not a requirement.Base: Tailored for teams seeking to amplify their data curation, labelling efforts, and model monitoring, with enhanced support from Argilla.Medium: Designed for teams expanding their language model pipelines, requiring robust ML lifecycle management fortified by Argilla's comprehensive support.Large: Geared towards teams heavily dependent on language model pipelines, human feedback, and applications, requiring complete ML lifecycle management with robust support.Scope of services Argilla Cloud, a fully managed SaaS, encompasses the following functionalities: Unrestricted Users, Datasets, and Workspaces: The service imposes no limits on the number of users, datasets, or workspaces, supporting scalability of operations.Role-Based Access Control: Administrators and annotators have differentiated access rights to ensure structured and secure data management.Custom Subdomain: Clients are provided with a distinct argilla.io subdomain for accessing the platform.Regular Updates and Upgrades: The service includes regular platform patches and upgrades as part of routine maintenance to uphold system integrity and security.Managed Service: Infrastructure maintenance, backend operations, and other technical aspects are managed by Argilla, eliminating the need for client-side management.Security The security framework of the Argilla Cloud service involves a multi-faceted approach: Data Encryption at Rest: Data stored within the system is encrypted, forming a crucial layer of security.This process automatically encrypts data prior to storage, guarding against unauthorized access.Network Security Measures: The infrastructure has been designed to prevent unauthorized intrusion and to ensure consistent service availability.Measures include firewall protections, intrusion detection systems, and scheduled vulnerability scans to detect and address potential threats.Role-Based Access Control: The system implements role-based access control, defining access levels based on user roles.This mechanism controls the extent of access to sensitive information, aligning it with the responsibilities of each role.Security Audits: Regular audits of security systems and protocols are conducted to detect potential vulnerabilities and verify adherence to security standards.Employee Training: All personnel receive regular security training, fostering an understanding of the latest threats and the importance of security best practices.Incident Response Protocol: In the case of a security incident, a pre-defined incident response plan is activated.This plan outlines the procedures for managing different types of security events, and aims to ensure swift mitigation of potential damage.In summary, the security measures in place include data encryption, network security protocols, role-based access control, regular audits, employee training, and a comprehensive incident response plan.These measures contribute to a secure environment for data management.Setup and onboarding The process for setup and onboarding for Argilla Cloud is designed to be efficient and straightforward.The procedure involves a sequence of steps to ensure a smooth transition and optimal use of the service.Step 1: Account Creation The setup process begins with the creation of the client owner account.We require the client to provide the following details: Full name of the administrator Preferred username Administrator's email address Once these details are received, we send an onboarding email to sign up.Step 2: Platform Orientation Once logged in, the administrator has full access to the Argilla Cloud platform.They can familiarize themselves with the platform interface and various features.If required, a guided tour or tutorial can be provided to walk the administrator through the platform.Step 3: User Management The administrator is then responsible for setting up additional user accounts.They can invite users via email, manage roles (admin, annotator, etc.), and assign access permissions to different workspaces and datasets.Step 4: Workspace and Dataset Configuration The administrator can create and manage multiple workspaces and datasets.They have the option to configure settings as per their team's requirements, including assigning datasets to specific workspaces and managing access permissions.Step 5: Training and Support Argilla provides open resources and support to aid in the onboarding process.This includes user manuals, tutorials, and access to our support team for any queries or issues that may arise during the setup and onboarding process.By following these steps, new users can be quickly onboarded and begin using the Argilla Cloud service with minimal downtime.\"]\n" + ] } + ], + "source": [ + "# Show an example of q, a, and context\n", + "print(f\"Question: {questions[0]}\")\n", + "print(f\"Answer: {answers[0]}\")\n", + "print(f\"Context: {contexts[0]}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "y6Bavj4Abkwa" + }, + "source": [ + "## Create Argilla dataset and collect feedback\n", + "\n", + "\n", + "We set up an Argilla Dataset for gathering human feedback.\n", + "\n", + "\n", + "For fine-tuning, we need to set up a text question to gather the human written or edited responses. This data is known as completion or demonstration data.\n", + "\n", + "Additionally, leveraging the multi-aspect feedback capabilities of Argilla, we set up two additional feedback dimensions to rate the relevance of the question (as they're synthetic they might be irrelevant or bad quality) and the quality of the context retrieved from our retriever component (can be used to improve the RAG configuration).\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ID3Qu9aVbtaX" + }, + "outputs": [], + "source": [ + "dataset = rg.FeedbackDataset(\n", + " fields=[rg.TextField(name=\"user-message\"), rg.TextField(name=\"context\")],\n", + " questions=[\n", + " rg.RatingQuestion(\n", + " name=\"question-rating\",\n", + " title=\"Rate the relevance of the user question\",\n", + " values=[1, 2, 3, 4, 5],\n", + " required=False,\n", + " ),\n", + " rg.RatingQuestion(\n", + " name=\"context-rating\",\n", + " title=\"Rate the quality and relevancy of context for the assistant\",\n", + " values=[1, 2, 3, 4, 5],\n", + " required=False,\n", + " ),\n", + " rg.TextQuestion(\n", + " name=\"response\",\n", + " title=\"Write a helpful, harmless, accurate response to the user question\",\n", + " ),\n", + " ],\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "btwbDF6_PayX" + }, + "source": [ + "We use the questions, context, and generated responses to build our feedback records. We pre-fill the responses in the UI with OpenAI's responses using `suggestions` and ask our labelers to edit them if necessary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bW63w95KcfwS" + }, + "outputs": [], + "source": [ + "records = []\n", + "\n", + "for question, answer, context in tqdm(\n", + " zip(questions, answers, contexts), total=len(questions)\n", + "):\n", + " # Instantiate the FeedbackRecord\n", + " feedback_record = rg.FeedbackRecord(\n", + " fields={\"user-message\": question, \"context\": \"\\n\".join(context)},\n", + " suggestions=[\n", + " {\n", + " \"question_name\": \"response\",\n", + " \"value\": answer,\n", + " }\n", + " ],\n", + " )\n", + " records.append(feedback_record)\n", + "\n", + "# Publish dataset in Argilla UI\n", + "dataset = dataset.push_to_argilla(name=\"customer_assistant\", workspace=\"admin\")\n", + "dataset.add_records(records)\n", + "\n", + "# Optional: store and version dataset in the Hub\n", + "# dataset = dataset.push_to_huggingface(\"argilla/rg_customer_assistant\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, the dataset is available for collecting feedback with the Argilla UI. Here's a video showing the workflow for labelers:\n", + "\n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "9yLxmSDsQPts" + }, + "source": [ + "## Prepare Argilla dataset for fine-tuning\n", + "\n", + "\n", + "We now read the responses from Argilla and prepare the dataset for fine-tuning following the [fine-tuning format from OpenAI guides](https://platform.openai.com/docs/guides/fine-tuning).\n", + "\n", + "We use the quick adaptation of LlamaIndex's `TEXT_QA_PROMPT` system prompt and the fine-tuned responses from our Argilla dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read the dataset from Argilla\n", + "dataset = rg.FeedbackDataset.from_argilla(\"customer_assistant\", workspace=\"admin\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you have skipped the previous steps run this to get the pre-built dataset.\n", + "\n", + "```python\n", + "dataset = rg.FeedbackDataset.from_huggingface(\"argilla/customer_assistant\")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "N5hFLJaHSz9C" + }, + "outputs": [], + "source": [ + "# Adaptation from LlamaIndex's TEXT_QA_PROMPT_TMPL_MSGS[1].content\n", + "user_message_prompt = \"\"\"Context information is below.\n", + "---------------------\n", + "{context_str}\n", + "---------------------\n", + "Given the context information and not prior knowledge but keeping your Argilla Cloud assistant style, answer the query.\n", + "Query: {query_str}\n", + "Answer:\n", + "\"\"\"\n", + "# Adaptation from LlamaIndex's TEXT_QA_SYSTEM_PROMPT\n", + "system_prompt = \"\"\"You are an expert customer service assistant for the Argilla Cloud product that is trusted around the world.\n", + "Always answer the query using the provided context information, and not prior knowledge.\n", + "Some rules to follow:\n", + "1. Never directly reference the given context in your answer.\n", + "2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "siPKLnctRLpD" + }, + "outputs": [], + "source": [ + "def formatting_func(\n", + " sample: dict,\n", + ") -> Union[Tuple[str, str, str, str], List[Tuple[str, str, str, str]]]:\n", + " from uuid import uuid4\n", + "\n", + " if sample[\"response\"]:\n", + " chat = str(uuid4())\n", + " user_message = user_message_prompt.format(\n", + " context_str=sample[\"context\"], query_str=sample[\"user-message\"]\n", + " )\n", + " return [\n", + " (chat, \"0\", \"system\", system_prompt),\n", + " (chat, \"1\", \"user\", user_message),\n", + " (chat, \"2\", \"assistant\", sample[\"response\"][0][\"value\"]),\n", + " ]\n", + "\n", + "\n", + "task = TrainingTask.for_chat_completion(formatting_func=formatting_func)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "SVvdU9EqVWSR" + }, + "source": [ + "## Fine-tune GPT3.5 with high-quality feedback\n", + "\n", + "\n", + "We fine-tune `gpt-3.5-turbo` with the exported dataset using the Argilla Trainer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "afLyk289VhBL" + }, + "outputs": [], + "source": [ + "trainer = ArgillaTrainer(\n", + " dataset=dataset,\n", + " task=task,\n", + " framework=\"openai\",\n", + ")\n", + "trainer.train(output_dir=\"my-ft-openai-model\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "eG-zXJJ2dbi_" + }, + "source": [ + "## Evaluating base vs fine-tuned with human preference data\n", + "\n", + "We set up a new feedback dataset for gathering human feedback to evaluate the fine-tuned model against the base model, using the test dataset.\n", + "\n", + "There are many ways to collect feedback for this phase. The most suitable in this case is human preference data over responses from the two models: *asking our labelers which response is the most accurate and helpful*. We can easily do this with Argilla's `RankingQuestion`.\n", + "\n", + "Additionally, as both responses can be equally bad, we can ask labelers to write down a correct response. In this case, we would be collecting demonstration data to add to our fine-tuning workflow.\n", + "\n", + "\n", + "### Create dataset and collect feedback\n", + "\n", + "We set up and publish a new dataset with a `RankingQuestion` and `TextQuestion`, showing our labelers the `user-message` and two responses (from the base and the fine-tuned models)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tsQT0Ps1igZc" + }, + "outputs": [], + "source": [ + "dataset = rg.FeedbackDataset(\n", + " fields=[\n", + " rg.TextField(name=\"user-message\"),\n", + " rg.TextField(name=\"response-a\"),\n", + " rg.TextField(name=\"response-b\"),\n", + " ],\n", + " questions=[\n", + " rg.RankingQuestion(\n", + " name=\"preference\",\n", + " title=\"Which response is more helpful, harmless, and accurate.\",\n", + " values=[\"response-a\", \"response-b\"],\n", + " ),\n", + " rg.TextQuestion(\n", + " name=\"response\",\n", + " title=\"If none is good, write a helpful, harmless, accurate response to the user question\",\n", + " required=False,\n", + " ),\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "95-iBx8fgoGg" + }, + "outputs": [], + "source": [ + "# Read our test questions\n", + "questions = load_dataset(\"argilla/cloud_assistant_questions\", split=\"test\")[\"question\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "background_save": true + }, + "id": "NPPLoZGae9ia" + }, + "outputs": [], + "source": [ + "# Generate responses with base model\n", + "index = VectorStoreIndex.from_documents(documents, service_context=gpt_35_context)\n", + "query_engine = index.as_query_engine(similarity_top_k=2)\n", + "\n", + "contexts = []\n", + "base_model_responses = []\n", + "\n", + "for question in tqdm(questions):\n", + " response = query_engine.query(question)\n", + " base_model_responses.append(str(response))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AU9NwKnQgAPu" + }, + "outputs": [], + "source": [ + "# Generate responses with ft model: replace with the id of your ft model\n", + "ft_context = ServiceContext.from_defaults(\n", + " llm=OpenAI(model=\"ft:gpt-3.5-turbo-...\", temperature=0.3)\n", + ")\n", + "index = VectorStoreIndex.from_documents(documents, service_context=ft_context)\n", + "query_engine = index.as_query_engine(similarity_top_k=2)\n", + "\n", + "contexts = []\n", + "ft_model_responses = []\n", + "\n", + "for question in tqdm(questions):\n", + " response = query_engine.query(question)\n", + " ft_model_responses.append(str(response))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "hsPoYfgJTCOD" + }, + "source": [ + "An important step here is to randomize the order in which responses are shown.\n", + "\n", + "If we show the fine-tuned model response always as the first option, we can introduce position bias (labelers always choosing a certain position) or make it evident to users that there are two obviously different models.\n", + "\n", + "To avoid this, we randomize the position and keep two metadata fields indicating which model has produced `response-a` and `response-b`. When collecting the responses, we'll use this metadata to map the ranking with each model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0zvH3sDkhMFJ" + }, + "outputs": [], + "source": [ + "records = []\n", + "for base, ft, question in zip(base_model_responses, ft_model_responses, questions):\n", + " # Randomizing the position is a highly important step to mitigate labeler biases\n", + " # Shuffle the order of base and ft\n", + " response_a, response_b = random.sample([base, ft], 2)\n", + "\n", + " # Map the responses back to their model names\n", + " models = {base: \"base_model\", ft: \"ft_model\"}\n", + " feedback_record = rg.FeedbackRecord(\n", + " fields={\n", + " \"user-message\": question,\n", + " \"response-a\": response_a,\n", + " \"response-b\": response_b,\n", + " },\n", + " metadata={\n", + " \"response-a-model\": models[response_a],\n", + " \"response-b-model\": models[response_b],\n", + " },\n", + " )\n", + "\n", + " records.append(feedback_record)\n", + "\n", + "dataset = dataset.push_to_argilla(\n", + " name=\"finetuned-vs-base-preference\", workspace=\"admin\"\n", + ")\n", + "dataset.add_records(records)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, the dataset is available for collecting feedback with the Argilla UI. Here's a video showing the workflow for labelers:\n", + "\n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "eRiVf39Bblmh" + }, + "source": [ + "### Retrieve and analyze responses\n", + "\n", + "We can dynamically collect the responses from our labelers. In this case, we will compute the win rate and ties (as users can indicate both responses are equally good or bad).\n", + "\n", + "For the tutorial, we only have one user but Argilla Feedback is fully multi-user, which means you can collect feedback from several users for each data point, increasing the quality of the evaluation. \n", + "\n", + "You can read more about multi-user scenarios and built-in unification methods [on this guide](/practical_guides/collect_responses.md).\n", + "\n", + "With a very small evaluation set, we can see that the fine-tuned model responses are preferred ~60% of the time, 3x over the base model, and they are both equally good or bad ~20% of the time. \n", + "\n", + "\n", + "Even with a very small fine-tuning and evaluation dataset, this already shows **promising benefits of fine-tuning models for enhancing RAG systems**.\n", + "\n", + "\n", + "![Fine-tuned model vs base model](/_static/images/llms/fine-tuning-openai-rag-feedback/fine-tune-vs-base.png \"Fine-tuned model vs base model\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XZKBhlKCbwUV" + }, + "outputs": [], + "source": [ + "# Retrieve the dataset from Argilla\n", + "dataset = rg.FeedbackDataset.from_argilla(\n", + " name=\"finetuned-vs-base-preference\", workspace=\"admin\"\n", + ")\n", + "\n", + "win_rates = {\"ft_model\": 0, \"base_model\": 0, \"tie\": 0}\n", + "\n", + "# Compute the win and tie rates\n", + "for record in dataset.records:\n", + " if len(record.responses) > 0:\n", + " for response in record.responses:\n", + " model_a = record.metadata[\"response-a-model\"]\n", + " model_b = record.metadata[\"response-b-model\"]\n", + " preference = response.values[\"preference\"].value\n", + " if preference[0].rank > preference[1].rank:\n", + " win_rates[model_a] = win_rates[model_a] + 1\n", + " elif preference[1].rank > preference[0].rank:\n", + " win_rates[model_b] = win_rates[model_b] + 1\n", + " else:\n", + " win_rates[\"tie\"] = win_rates[\"tie\"] + 1\n", + "win_rates\n", + "# {'ft_model': 17, 'base_model': 6, 'tie': 5}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BpoH-OnRnuoL" + }, + "outputs": [], + "source": [ + "# Let's make the labels more explicit\n", + "data = {\"gpt3.5-fine-tuned\": 17, \"gpt3.5-base\": 6, \"tie\": 5}\n", + "total = sum(data.values())\n", + "\n", + "# Calculate percentages\n", + "percentages = [value / total * 100 for value in data.values()]\n", + "\n", + "# Settings\n", + "colors = [\"blue\", \"grey\", \"black\"]\n", + "labels = [f\"{key} ({value:.2f}%)\" for key, value in zip(data.keys(), percentages)]\n", + "\n", + "# Plotting\n", + "plt.figure(figsize=(12, 2))\n", + "\n", + "# The cumulative percentage is used to shift the starting point of each subsequent segment\n", + "cumulative_percentages = 0\n", + "\n", + "for percent, color, label in zip(percentages, colors, labels):\n", + " plt.barh(\"Models\", percent, color=color, label=label, left=cumulative_percentages)\n", + " plt.text(\n", + " cumulative_percentages + percent / 2,\n", + " 0,\n", + " label,\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " color=\"white\",\n", + " fontsize=10,\n", + " )\n", + " cumulative_percentages += percent\n", + "\n", + "plt.gca().axes.get_yaxis().set_visible(False)\n", + "plt.xlim(0, 100)\n", + "plt.title(\"Model Win Rates\")\n", + "plt.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.25), ncol=3)\n", + "plt.tight_layout()\n", + "\n", + "# Display\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "hCrnoTw_GIyr" + }, + "source": [ + "## Appendix: Generating questions with Llama Index\n", + "\n", + "\n", + "We use the `DatasetGenerator` from Llama Index to generate a set of questions using a document about Argilla Cloud." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kf-hZurHckDf" + }, + "outputs": [], + "source": [ + "UnstructuredReader = download_loader(\"UnstructuredReader\", refresh_cache=True)\n", + "loader = UnstructuredReader()\n", + "\n", + "# You can download this doc from: https://huggingface.co/datasets/argilla/cloud_assistant_questions/raw/main/argilla_cloud.txt\n", + "documents = loader.load_data(\"argilla_cloud.txt\")\n", + "\n", + "gpt_35_context = ServiceContext.from_defaults(\n", + " llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0.4), chunk_size=60\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wSVCTgEsc1zm" + }, + "outputs": [], + "source": [ + "question_gen_query = (\n", + " \"You are customer support and sales expert of Argilla. Your task is to setup \"\n", + " \"a set of frequently asked questions about the Argilla Cloud service, offer and plans\"\n", + " \"formulate a single question that could be asked by a potential B2B client interested in Argilla Cloud \"\n", + " \". Restrict the question to the context information provided and don't ask general questions not related to the service and the context provided.\"\n", + ")\n", + "\n", + "dataset_generator = DatasetGenerator.from_documents(\n", + " documents,\n", + " question_gen_query=question_gen_query,\n", + " service_context=gpt_35_context,\n", + " num_questions_per_chunk=100,\n", + ")\n", + "\n", + "questions = dataset_generator.generate_questions_from_nodes(num=300)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-sentencesimilarity-rag.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-sentencesimilarity-rag.ipynb index 54b7156b49..2461d17cfb 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-sentencesimilarity-rag.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-sentencesimilarity-rag.ipynb @@ -117,7 +117,15 @@ "from tqdm import tqdm\n", "\n", "from haystack.document_stores import FAISSDocumentStore\n", - "from haystack.nodes import PreProcessor, TextConverter, EmbeddingRetriever, PromptNode, PromptTemplate, AnswerParser, SentenceTransformersRanker\n", + "from haystack.nodes import (\n", + " PreProcessor,\n", + " TextConverter,\n", + " EmbeddingRetriever,\n", + " PromptNode,\n", + " PromptTemplate,\n", + " AnswerParser,\n", + " SentenceTransformersRanker,\n", + ")\n", "from haystack.pipelines import Pipeline\n", "from haystack.pipelines.standard_pipelines import TextIndexingPipeline" ] @@ -138,11 +146,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -165,7 +169,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -188,9 +192,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -249,7 +256,9 @@ "outputs": [], "source": [ "# Initialize the DocumentStore\n", - "document_store = FAISSDocumentStore(faiss_index_factory_str=\"Flat\", similarity=\"dot_product\", embedding_dim=384)\n", + "document_store = FAISSDocumentStore(\n", + " faiss_index_factory_str=\"Flat\", similarity=\"dot_product\", embedding_dim=384\n", + ")\n", "\n", "# Initialize the PreProcessor\n", "preprocessor = PreProcessor(\n", @@ -297,7 +306,8 @@ "source": [ "# Initialize the EmbeddingRetriever\n", "retriever = EmbeddingRetriever(\n", - " document_store=document_store, embedding_model=\"sentence-transformers/multi-qa-mpnet-base-dot-v1\"\n", + " document_store=document_store,\n", + " embedding_model=\"sentence-transformers/multi-qa-mpnet-base-dot-v1\",\n", ")\n", "document_store.update_embeddings(retriever)\n", "\n", @@ -307,11 +317,13 @@ " Provide a clear and concise response.\n", " Your answer should be in your own words and be no longer than 50 words.\n", " \\n\\n Related text: {join(documents, delimiter=new_line, pattern=new_line+'Document[$idx]: $content', str_replace={new_line: ' ', '[': '(', ']': ')'})} \\n Question: {query}; Answer: \"\"\",\n", - " output_parser=AnswerParser(reference_pattern=r\"Document\\[(\\d+)\\]\"),\n", + " output_parser=AnswerParser(reference_pattern=r\"Document\\[(\\d+)\\]\"),\n", ")\n", "\n", "# Initialize PromptNode\n", - "prompt_node = PromptNode(model_name_or_path=\"google/flan-t5-large\", default_prompt_template=rag_prompt)" + "prompt_node = PromptNode(\n", + " model_name_or_path=\"google/flan-t5-large\", default_prompt_template=rag_prompt\n", + ")" ] }, { @@ -336,17 +348,16 @@ "questions = dataset[\"train\"][\"question\"]\n", "answers = []\n", "contexts = []\n", - " \n", + "\n", "for question in tqdm(questions):\n", - " \n", " # Get the response and save it\n", " response = pipe.run(query=question)\n", " answers.append(response[\"answers\"][0].answer)\n", - " \n", + "\n", " # Get the document contexts and save them\n", - " prompt = response[\"answers\"][0].meta['prompt']\n", - " segments = re.split(r'Document\\[\\d+\\]:', prompt)\n", - " document_segments = [segment.strip() for segment in segments[1:]] \n", + " prompt = response[\"answers\"][0].meta[\"prompt\"]\n", + " segments = re.split(r\"Document\\[\\d+\\]:\", prompt)\n", + " document_segments = [segment.strip() for segment in segments[1:]]\n", " contexts.append(document_segments)" ] }, @@ -464,8 +475,13 @@ "# Create the proper records\n", "records = [\n", " rg.FeedbackRecord(\n", - " fields={\"query\": question, \"retrieved_document_1\": context[0], \"retrieved_document_2\": context[1], \"retrieved_document_3\": context[2]},\n", - " metadata={\"source\": \"flan-t5-large\"}\n", + " fields={\n", + " \"query\": question,\n", + " \"retrieved_document_1\": context[0],\n", + " \"retrieved_document_2\": context[1],\n", + " \"retrieved_document_3\": context[2],\n", + " },\n", + " metadata={\"source\": \"flan-t5-large\"},\n", " )\n", " for question, context in tqdm(zip(questions, contexts))\n", "]\n", @@ -554,7 +570,9 @@ ], "source": [ "# Add the new metadata property\n", - "metadata = rg.TermsMetadataProperty(name=\"sources\", title=\"Model sources\", values=[\"flan-t5-large/flan-t5-large\"])\n", + "metadata = rg.TermsMetadataProperty(\n", + " name=\"sources\", title=\"Model sources\", values=[\"flan-t5-large/flan-t5-large\"]\n", + ")\n", "\n", "dataset_ssim.add_metadata_property(metadata)" ] @@ -572,7 +590,7 @@ " for j in range(i + 1, len(context)):\n", " record = rg.FeedbackRecord(\n", " fields={\"sentence1\": context[i], \"sentence2\": context[j]},\n", - " metadata={\"sources\": \"flan-t5-large/flan-t5-large\"}\n", + " metadata={\"sources\": \"flan-t5-large/flan-t5-large\"},\n", " )\n", " records.append(record)\n", "\n", @@ -650,11 +668,13 @@ "source": [ "# Define the training task using the formatting function\n", "def formatting_func(sample):\n", - "\n", " records = []\n", "\n", " for i in range(1, 4):\n", - " record = {\"sentence-1\": sample[\"query\"], \"sentence-2\": sample[f\"retrieved_document_{i}\"]}\n", + " record = {\n", + " \"sentence-1\": sample[\"query\"],\n", + " \"sentence-2\": sample[f\"retrieved_document_{i}\"],\n", + " }\n", " values = [resp[\"value\"] for resp in sample[f\"rating_retrieved_document_{i}\"]]\n", " label = int(values[0])\n", " record[\"label\"] = label\n", @@ -662,6 +682,7 @@ "\n", " return records\n", "\n", + "\n", "task = TrainingTask.for_sentence_similarity(formatting_func=formatting_func)" ] }, @@ -676,7 +697,7 @@ " dataset=dataset_rag,\n", " task=task,\n", " framework=\"sentence-transformers\",\n", - " framework_kwargs={\"cross_encoder\": False}\n", + " framework_kwargs={\"cross_encoder\": False},\n", ")\n", "trainer_bi.train(output_dir=\"my_bi_sentence_transformer_model\")" ] @@ -717,8 +738,11 @@ "source": [ "# Define the training task\n", "task = TrainingTask.for_sentence_similarity(\n", - " texts=[dataset_ssim.field_by_name(\"sentence-1\"), dataset_ssim.field_by_name(\"sentence-2\")],\n", - " label=dataset_ssim.question_by_name(\"similarity\")\n", + " texts=[\n", + " dataset_ssim.field_by_name(\"sentence-1\"),\n", + " dataset_ssim.field_by_name(\"sentence-2\"),\n", + " ],\n", + " label=dataset_ssim.question_by_name(\"similarity\"),\n", ")" ] }, @@ -733,7 +757,7 @@ " dataset=dataset_ssim,\n", " task=task,\n", " framework=\"sentence-transformers\",\n", - " framework_kwargs={\"cross_encoder\": True}\n", + " framework_kwargs={\"cross_encoder\": True},\n", ")\n", "trainer_cross.train(output_dir=\"my_cross_sentence_transformer_model\")" ] @@ -788,7 +812,9 @@ "outputs": [], "source": [ "# Initialize the SentenceTransformersRanker with out model\n", - "ranker = SentenceTransformersRanker(model_name_or_path=\"my_cross_sentence_transformer_model\")" + "ranker = SentenceTransformersRanker(\n", + " model_name_or_path=\"my_cross_sentence_transformer_model\"\n", + ")" ] }, { @@ -810,11 +836,13 @@ " Provide a clear and concise response.\n", " Your answer should be in your own words and be no longer than 50 words.\n", " \\n\\n Related text: {join(documents, delimiter=new_line, pattern=new_line+'Document[$idx]: $content', str_replace={new_line: ' ', '[': '(', ']': ')'})} \\n Question: {query}; Answer: \"\"\",\n", - " output_parser=AnswerParser(reference_pattern=r\"Document\\[(\\d+)\\]\"),\n", + " output_parser=AnswerParser(reference_pattern=r\"Document\\[(\\d+)\\]\"),\n", ")\n", "\n", "# Initialize PromptNode\n", - "prompt_node = PromptNode(model_name_or_path=\"google/flan-t5-large\", default_prompt_template=rag_prompt)" + "prompt_node = PromptNode(\n", + " model_name_or_path=\"google/flan-t5-large\", default_prompt_template=rag_prompt\n", + ")" ] }, { @@ -840,17 +868,16 @@ "questions = dataset[\"train\"][\"question\"]\n", "answers = []\n", "contexts = []\n", - " \n", + "\n", "for question in tqdm(questions):\n", - " \n", " # Get the response and save it\n", " response = pipe.run(query=question)\n", " answers.append(response[\"answers\"][0].answer)\n", - " \n", + "\n", " # Get the document context and save it\n", - " prompt = response[\"answers\"][0].meta['prompt']\n", - " segments = re.split(r'Document\\[\\d+\\]:', prompt)\n", - " document_segments = [segment.strip() for segment in segments[1:]] \n", + " prompt = response[\"answers\"][0].meta[\"prompt\"]\n", + " segments = re.split(r\"Document\\[\\d+\\]:\", prompt)\n", + " document_segments = [segment.strip() for segment in segments[1:]]\n", " contexts.append(document_segments)" ] }, diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-langchain-syntethic.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-langchain-syntethic.ipynb index ba95bdafc8..2b1061fe7d 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-langchain-syntethic.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-langchain-syntethic.ipynb @@ -1,2874 +1,2895 @@ { - "cells": [ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🎡 Create synthetic data and annotations with LLMs\n", + "\n", + "LLMs are diverse and can be used for many different tasks. Besides cool chat interactions, LLMs can be powerful tools for creating synthetic data and providing initial suggestions for labelling tasks for which you don't have any data yet. This way anyone can easily get a head start on bootstrapping a project.\n", + "\n", + "In this example, we will demonstrate how to use different LLM tools, like `openai`, `transformers`, `langchain` and `outlines`, to create synthetic data and we can leverage those same LLMs for providing initial annotation or suggestions.\n", + "\n", + "If you want a more basic introduction to synthetic data with our `ArgillaCallbackHandler` for `langchain`, you can take a look at [this practical guide](../../../tutorials_and_integrations/integrations/use_argilla_callback_in_langchain.md).\n", + "\n", + "
\n", + "\n", + "Warning\n", + "\n", + "Do keep in mind that LLMs have licenses and not every LLM can be used for creating synthetic data in every operational setting. Please check the license of the LLM you are using before using it for creating synthetic data.\n", + "\n", + "
\n", + "\n", + "Let's get started!\n", + "\n", + "![Feedback Task dataset with synthetic data and suggestions from LLMs](/_static/images/llms/labelling-feedback-langchain-syntethic/synthetic-data.png)\n", + "\n", + "
\n", + "\n", + "Note \n", + "\n", + "This tutorial is a Jupyter Notebook. There are two options to run it:\n", + "\n", + "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", + "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", + "\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "For this tutorial, you will need to have an Argilla server running. If you don't have one already, check out our [Quickstart](../../../getting_started/quickstart.md) or [Installation](../../../getting_started/quickstart_installation.ipynb) pages. Once you do, complete the following steps:\n", + "\n", + "1. Install the Argilla client and the required third-party libraries using `pip`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yN2atS0RE2pF" + }, + "outputs": [], + "source": [ + "!pip install argilla openai langchain outlines tiktoken transformers ipywidgets jupyter" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Let's make the necessary imports:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "POQgkfrWEg1u" + }, + "outputs": [], + "source": [ + "import argilla as rg\n", + "\n", + "import os\n", + "import random\n", + "\n", + "from langchain import OpenAI, PromptTemplate, LLMChain\n", + "from langchain.output_parsers import CommaSeparatedListOutputParser\n", + "from outlines import models, text\n", + "from outlines.text import generate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", + "# Replace api_key if you configured a custom API key\n", + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Set the HF_TOKEN environment variable\n", + "# import os\n", + "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", + "\n", + "# # Replace api_url with the url to your HF Spaces URL\n", + "# # Replace api_key if you configured a custom API key\n", + "# rg.init(\n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", + "# api_key=\"admin.apikey\",\n", + "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "4. We also need to set your OpenAI API credentials by [creating an API key](https://platform.openai.com/docs/quickstart/add-your-api-key) and setting defining the `OPENAI_API_KEY` environment variable." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from argilla.utils.telemetry import tutorial_running\n", + "\n", + " tutorial_running()\n", + "except ImportError:\n", + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define a FeedbackDataset" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we will create a synthetic dataset for a banking customer care scenario. We assume that customers will write `text` requests. These requests should then be classified for `sentiment` and `topics`. The `topics` will be a multi-label classification and can be used to route the request to the correct department. The `sentiment` will be used using a single-label classification to determine if the request needs to be handled with priority." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "0UsoG5OtE11w" + }, + "outputs": [], + "source": [ + "sentiment = [\"positive\", \"neutral\", \"negative\"]\n", + "topic = [\"new_card\", \"mortgage\", \"application\", \"payments\"]\n", + "\n", + "dataset = rg.FeedbackDataset(\n", + " fields=[rg.TextField(name=\"text\")],\n", + " questions=[\n", + " rg.LabelQuestion(\n", + " name=\"sentiment\",\n", + " title=\"What is the sentiment of the message?\",\n", + " labels=sentiment,\n", + " ),\n", + " rg.MultiLabelQuestion(\n", + " name=\"topics\",\n", + " title=\"Select the topic(s) of the message?\",\n", + " labels=topic,\n", + " visible_labels=4,\n", + " ),\n", + " ],\n", + " guidelines=(\n", + " \"This dataset contains messages from a bank's customer support chatbot. \"\n", + " \"The goal is to label the sentiment and topics of the messages.\"\n", + " ),\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create synthetic data" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will use LLMs to generate the synthetic data for each step of the NLP task. First, we will create `text` requests from customers for a bank. Next, we will create input for the `LabelQuestion` to assess the sentiment of the requests and lastly, we will create input for the `MultiLabelQuestion` to classify the requests.\n", + "\n", + "We will do this using [OpenAI](https://github.com/openai/openai-python) models in combination with the [LangChain](https://github.com/langchain-ai/langchain) and open-source transformer-based models in combination with [Outlines](https://github.com/normal-computing/outlines) packages.\n", + "\n", + " | The LangChain framework is a wrapper around LLM models that allows for easier data-aware and agent-based LLM models. \n", + "\n", + " | Outlines is a Python library to write reliable programs for conditional generation during interactions with generative models.\n", + " \n", + "
\n", + "\n", + "Note \n", + "\n", + "The process of prompt engineering is a trial-and-error process. Changes somewhere might result in undesirable effects in another place in the language chain. The examples below are just a starting point and can be improved by experimenting with different prompts and templates.\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize Generative model\n", + "\n", + "#### LangChain with OpenAI\n", + "\n", + "For the usage of LangChain you need to pass the `OPENAI_API_KEY` environment variable to the `OpenAI` class. You can do this by using the `os` package. The `model` variable is then ready to use in the examples below." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n", + "openai_model = OpenAI(api_key=os.environ.get(\"OPENAI_API_KEY\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Outlines with Transformers\n", + "\n", + "Even though Outlines does provide some support for OpenAI, we will use basic transformers for this example. You can use any generative model from the [HuggingFace model hub](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending) by passing the name of the model to the `transformers` function. The `model` variable is then ready to use in the examples below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "transformer_model = models.transformers(\"gpt2\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `TextField`\n", + "\n", + "For creating a review, we rely on free text generation based on a prompt. This should be good enough for our purposes of creating a synthetic dataset as well as keeping the process as simple as possible." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### LangChain\n", + "\n", + "OpenAI models have been instruction-tuned and can thus be used via LangChain to generate synthetic data. This is done using a `PrompTemplate` that infers information from `topic` and `sentiment` variables that are passed to the `predict()` method." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 🎡 Create synthetic data and annotations with LLMs\n", - "\n", - "LLMs are diverse and can be used for many different tasks. Besides cool chat interactions, LLMs can be powerful tools for creating synthetic data and providing initial suggestions for labelling tasks for which you don't have any data yet. This way anyone can easily get a head start on bootstrapping a project.\n", - "\n", - "In this example, we will demonstrate how to use different LLM tools, like `openai`, `transformers`, `langchain` and `outlines`, to create synthetic data and we can leverage those same LLMs for providing initial annotation or suggestions.\n", - "\n", - "If you want a more basic introduction to synthetic data with our `ArgillaCallbackHandler` for `langchain`, you can take a look at [this practical guide](../../../tutorials_and_integrations/integrations/use_argilla_callback_in_langchain.md).\n", - "\n", - "
\n", - "\n", - "Warning\n", - "\n", - "Do keep in mind that LLMs have licenses and not every LLM can be used for creating synthetic data in every operational setting. Please check the license of the LLM you are using before using it for creating synthetic data.\n", - "\n", - "
\n", - "\n", - "Let's get started!\n", - "\n", - "![Feedback Task dataset with synthetic data and suggestions from LLMs](/_static/images/llms/labelling-feedback-langchain-syntethic/synthetic-data.png)\n", - "\n", - "
\n", - "\n", - "Note \n", - "\n", - "This tutorial is a Jupyter Notebook. There are two options to run it:\n", - "\n", - "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", - "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", - "\n", - "
\n" + "data": { + "text/plain": [ + "'I recently had the pleasure of working with the mortgage team at this bank, and I can confidently say that their level of service and expertise was second to none. They answered all of my questions quickly and took the time to explain the process to me in detail. I felt like they genuinely had my best interests at heart and they made the process of obtaining a mortgage as smooth and stress-free as possible. I would highly recommend this bank for anyone looking to take out a mortgage.'" ] - }, + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "template = (\n", + " \"Write a customer review for a bank. \"\n", + " \"Do that for topic of {topic}. \"\n", + " \"Do that with one a {sentiment} sentiment.\"\n", + ")\n", + "prompt = PromptTemplate(template=template, input_variables=[\"topic\", \"sentiment\"])\n", + "llm_chain_review = LLMChain(prompt=prompt, llm=openai_model)\n", + "\n", + "\n", + "def generate_langchain_review():\n", + " return llm_chain_review.predict(\n", + " topic=random.choice(topic), sentiment=random.choice(sentiment)\n", + " ).strip()\n", + "\n", + "\n", + "generate_langchain_review()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will now create a function that can generate `n`-random examples to evaluate the performance. As we can expect from the recent generation of OpenAI models, the results look good and seem diverse enough to be used as synthetic data." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "For this tutorial, you will need to have an Argilla server running. If you don't have one already, check out our [Quickstart](../../../getting_started/quickstart.md) or [Installation](../../../getting_started/quickstart_installation.ipynb) pages. Once you do, complete the following steps:\n", - "\n", - "1. Install the Argilla client and the required third-party libraries using `pip`:" + "data": { + "text/plain": [ + "[\"I've been a customer of this bank for over 5 years, and I've been completely satisfied with their payment services. The online payment system is easy to use and the customer service team is always quick to respond to any questions I have. I never have to worry about my payments being delayed or lost, which is always reassuring. Highly recommend this bank for anyone looking for reliable payment services!\",\n", + " \"I recently secured a mortgage with this bank and was so impressed with the level of service I received. From the start, the staff was friendly, knowledgeable, and willing to go above and beyond to get me the best deal. The process was straightforward and the paperwork was easy to understand. I'm thrilled with my new mortgage and would highly recommend this bank to anyone looking for a mortgage.\"]" ] - }, + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def generate_n_langchain_reviews(n=2):\n", + " reviews = []\n", + " for n in range(n):\n", + " reviews.append(generate_langchain_review())\n", + " return reviews\n", + "\n", + "\n", + "langchain_reviews = generate_n_langchain_reviews()\n", + "langchain_reviews" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Outlines\n", + "\n", + "Not all generative models are instruction tuned and as useful as modern-day LLMs. So take into account that this should be reflected in your `prompt` and the expected quality of the generated text. " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yN2atS0RE2pF" - }, - "outputs": [], - "source": [ - "!pip install argilla openai langchain outlines tiktoken transformers ipywidgets jupyter" + "data": { + "text/plain": [ + "\"Because of technical questions and consumer protection. Telephone orders are not placed in the bank's database because where homeowners need to know that their bank is registered here, this protection providing a system to check their record is not protected by even their own state laws, which is why I don't believe criminal laws should be used to enforce the bank ATM login, nor should neutral other town or guild banking providers be regulated. These accountants have followed the local law explanations, and they do not deserve criminal sanction for allowing a\"" ] - }, + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@text.prompt\n", + "def generator(topic, sentiment):\n", + " \"\"\"\n", + " The customer service for {{ topic }} of the bank is {{ sentiment }} because\n", + " \"\"\"\n", + "\n", + "\n", + "def generate_outlines_review():\n", + " prompt = generator(random.choice(topic), random.choice(sentiment))\n", + " answer = text.generate.continuation(transformer_model, max_tokens=100)(prompt)\n", + " answer = \"Because\" + answer\n", + " return answer\n", + "\n", + "\n", + "generate_outlines_review()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will now create a function that can generate `n`-random examples. Looking at the examples, the model seems to generate roughly related texts but in general, the quality proves poorer. It can therefore be recommended to use another type of models, which might be instruction tuned to ensure a higher quality generation. Additionally, Outlines offers more dynamic control over the generation process, which might be used to improve the quality of the generated text too." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2. Let's make the necessary imports:" + "data": { + "text/plain": [ + "['Because of damaged card and adds some other attachments from other data on the ToS or database file.\"',\n", + " 'Because of jurisdictional issues,\" said the FTC\\'s executive director, Paul R. Matthewi. \"Technology seems to be without limits in the fraud marketplace as we moved toward a new way of remote law enforcement and convenience.\"\\n\\nIt is unclear, however, how people will get paid—or how many will be affected. In TekSavvy, which relies on similar technology many consumer goods companies rely on to keep their customers healthy, some of America\\'s top credit card holders appear to still need money,']" ] - }, + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def generate_n_outlines_reviews(n=2):\n", + " reviews = []\n", + " for n in range(n):\n", + " reviews.append(generate_outlines_review())\n", + " return reviews\n", + "\n", + "\n", + "outlines_reviews = generate_n_outlines_reviews()\n", + "outlines_reviews" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `LabelQuestion`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this step, we will re-use the generated reviews from `langchain_reviews` and `outlines_reviews` and label their sentiment using the respective frameworks. This will be done by assuming a `str` to be returned from both of the lists of `sentiment` defined above." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### LangChain\n", + "\n", + "We are using a jinja-like `template`, which requires us to define the basic `prompt` as an `input_variable` for LangChain. For the initial example, we are using a demo." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "POQgkfrWEg1u" - }, - "outputs": [], - "source": [ - "import argilla as rg\n", - "\n", - "import os\n", - "import random\n", - "\n", - "from langchain import OpenAI, PromptTemplate, LLMChain\n", - "from langchain.output_parsers import CommaSeparatedListOutputParser\n", - "from outlines import models, text\n", - "from outlines.text import generate" + "data": { + "text/plain": [ + "'positive'" ] - }, + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "template = (\n", + " f\"Choose from the sentiments: {sentiment}. Return a single sentiment.{{prompt}}\"\n", + ")\n", + "prompt = PromptTemplate(template=template, input_variables=[\"prompt\"])\n", + "llm_chain_sentiment = LLMChain(prompt=prompt, llm=openai_model)\n", + "\n", + "\n", + "def get_sentiment_from_langchain(text: str) -> str:\n", + " return llm_chain_sentiment.predict(prompt=text).strip().lower()\n", + "\n", + "\n", + "get_sentiment_from_langchain(\"I love langchain and openai for sentiment labelling.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We get the sentiment labels for the generated `langchain_reviews`. We can see that the sentiment labels are not always correct, but they are mostly correct. This is because the LLMs are not perfect, but they are good enough to be used for synthetic data generation and providing suggestions for human annotators." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "3. If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" + "data": { + "text/plain": [ + "['positive', 'positive']" ] - }, + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "langchain_sentiment = [\n", + " get_sentiment_from_langchain(reviews) for reviews in langchain_reviews\n", + "]\n", + "langchain_sentiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Outlines\n", + "\n", + "Outlines provide an out-of-the-box implementation for guided labeling with generative, however, in some cases [(zero-shot) classification models from the HuggingFace library](https://huggingface.co/models?pipeline_tag=text-classification&sort=trending) can be used to provide a good point for the providing suggestions during a labeling process too. Take a look at our [example with SetFit](labelling-feedback-setfit.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", - "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "data": { + "text/plain": [ + "'positive'" ] - }, + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_sentiment_from_outlines(text: Union[str, list]) -> str:\n", + " return generate.choice(transformer_model, sentiment)(text)\n", + "\n", + "\n", + "get_sentiment_from_outlines(\"I love outlines and transformers for sentiment labelling.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use the `choice`-methods with a list of strings too." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + "data": { + "text/plain": [ + "['neutral', 'positive']" ] - }, + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outlines_sentiment = get_sentiment_from_outlines(outlines_reviews)\n", + "outlines_sentiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `MultiLabelQuestion`\n", + "\n", + "For this step, we will re-use the generated reviews from `lanchain_reviews` and `outlines_reviews` and label their topics as part of a multi-label classification problem." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Langchain\n", + "\n", + "Note that we are now using an output parser as a post-processing step for the returned output. We do this to ensure that we can obtain a `List[str]`. We will use the built-in `CommaSeparatedListOutputParser`, which split strings by comma and returns a list of strings as output. And we are using the jinja-like templating in a similar way as with the `SingleLabelQuestion`." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Set the HF_TOKEN environment variable\n", - "# import os\n", - "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", - "\n", - "# # Replace api_url with the url to your HF Spaces URL\n", - "# # Replace api_key if you configured a custom API key\n", - "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", - "# api_key=\"admin.apikey\",\n", - "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", - "# )" + "data": { + "text/plain": [ + "['new_card', 'mortgage', 'application', 'payments']" ] - }, + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output_parser = CommaSeparatedListOutputParser()\n", + "template = (\n", + " f\"Classify the text as the following topics: {topic}. \"\n", + " \"Return zero or more topics as a comma-separated list. If zero return an empty string. \"\n", + " \"{prompt}\"\n", + ")\n", + "prompt = PromptTemplate(\n", + " template=template, input_variables=[\"prompt\"], output_parser=output_parser\n", + ")\n", + "llm_chain_topics = LLMChain(\n", + " prompt=prompt, llm=openai_model, output_parser=output_parser\n", + ")\n", + "\n", + "\n", + "def get_topics_from_langchain(text: str) -> str:\n", + " return [\n", + " topic.lower() for topic in llm_chain_topics.predict(prompt=text) if topic != \"\"\n", + " ]\n", + "\n", + "\n", + "get_topics_from_langchain(\n", + " f\"I love extracting {topic} with and openai and langchain for topic labelling.\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We get the topic labels for the generated `langchain_reviews`." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "4. We also need to set your OpenAI API credentials by [creating an API key](https://platform.openai.com/docs/quickstart/add-your-api-key) and setting defining the `OPENAI_API_KEY` environment variable." + "data": { + "text/plain": [ + "[['new_card'], ['mortgage', 'application']]" ] - }, + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "langchain_topics = [get_topics_from_langchain(review) for review in langchain_reviews]\n", + "langchain_topics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Outlines\n", + "\n", + "Outlines does not have a direct way to generate data from choices but we are able to leverage their `Pydantic` integration to generate a `json` schema. Note that this is a hacky way to facilitate this guided generation and is not officially mentioned in [the paper behind the outlines package](https://arxiv.org/pdf/2307.09702.pdf). \n", + "\n", + "Additionally, the use of `json` requires `pydantic>=2`.\n", + "\n", + "```python\n", + "# DEMO CODE\n", + "class Topic(BaseModel):\n", + " new_card: bool = False\n", + " mortgage: bool = False\n", + " application: bool = False\n", + " payments: bool = False\n", + "\n", + "def get_topics_from_outlines(text: str) -> str:\n", + " topics = []\n", + " json_data = generate.json(transformer_model, Topic)(langchain_reviews[0])\n", + " for key, value in json_data.items():\n", + " if value:\n", + " topics.append(key)\n", + " return topics\n", + "\n", + "get_topics_from_outlines(f\"I love extracting {topic} with and outlines and transformers for topic labelling.\")\n", + "```" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create synthetic records" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have our synthetic data and predictions, we can use them to create Argilla records. We will create completely artificial records from the `text` for the `TextField` and we will assign the `sentiment` and `topics` as model suggestions for the `LabelQuestion` and `MultiLabelQuestion`, respectively. These suggestions will help the annotators to label the data faster and more accurately, but instead of using them as suggestions, you would also be able to apply them as [annotated responses](../../../practical_guides/create_update_dataset/suggestions_and_responses.md) directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For demo purposes, we will only create records with synthetic data obtained from `langchain`." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Enable Telemetry\n", - "\n", - "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." + "data": { + "text/plain": [ + "FeedbackRecord(fields={'text': '\\n\\nI recently applied for a mortgage at this bank and the process was simple and straightforward. The customer service team was helpful and knowledgeable, and their rates were competitive. Overall, I had a good experience.'}, metadata={}, responses=[], suggestions=(SuggestionSchema(question_id=None, question_name='sentiment', type=None, score=None, value='positive', agent=None), SuggestionSchema(question_id=None, question_name='topics', type=None, score=None, value=['mortgage', 'application'], agent=None)), external_id=None)" ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def create_synthetic_record():\n", + " review = generate_langchain_review()\n", + " record = rg.FeedbackRecord(\n", + " fields={\n", + " \"text\": review,\n", + " }\n", + " )\n", + " sentiment = get_sentiment_from_langchain(review)\n", + " topics = get_topics_from_langchain(review)\n", + " record.update(\n", + " suggestions=[\n", + " {\"question_name\": \"sentiment\", \"value\": sentiment},\n", + " {\"question_name\": \"topics\", \"value\": topics},\n", + " ]\n", + " )\n", + " return record\n", + "\n", + "\n", + "record = create_synthetic_record()\n", + "record" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will then add the synthetic `record` to the dataset, and upload the model data and dataset to the Argilla server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.add_records([record])\n", + "remote_dataset = dataset.push_to_argilla(name=\"synthetic-data\", workspace=\"argilla\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In this tutorial, we have covered how to create synthetic data using OpenAI and Lanchain, or Transformers and Outlines. We have highlighted some caveats to synthetic data generation when it comes to prompt engineering. Finally, we've shown how to use this synthesized data as input and suggestions for Argilla records.\n", + "\n", + "To learn more about LLMs, LangChain and OpenAI check out these links:\n", + "\n", + "- [Outlines](https://github.com/normal-computing/outlines)\n", + "- [LangChain](https://github.com/langchain-ai/langchain)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "argilla", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "vscode": { + "interpreter": { + "hash": "2d98cb9bf90a932b5bf8e86e91214497eb0e38eb318595fbd6fbd5460fe92036" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "04150cf7e9a74a04aafa94d394553630": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " from argilla.utils.telemetry import tutorial_running\n", - " tutorial_running()\n", - "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" - ] + "0447a98b5dfe42c899273b9c37bdadad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define a FeedbackDataset" - ] + "0c010df989eb497c810a6f960c6ea41b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this example, we will create a synthetic dataset for a banking customer care scenario. We assume that customers will write `text` requests. These requests should then be classified for `sentiment` and `topics`. The `topics` will be a multi-label classification and can be used to route the request to the correct department. The `sentiment` will be used using a single-label classification to determine if the request needs to be handled with priority." - ] + "0d7acd8e1a394336aa146e2a442f672c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "id": "0UsoG5OtE11w" - }, - "outputs": [], - "source": [ - "sentiment = [\"positive\", \"neutral\", \"negative\"]\n", - "topic = [\"new_card\", \"mortgage\", \"application\", \"payments\"]\n", - "\n", - "dataset = rg.FeedbackDataset(\n", - " fields = [rg.TextField(name=\"text\")],\n", - " questions = [\n", - " rg.LabelQuestion(\n", - " name=\"sentiment\",\n", - " title=\"What is the sentiment of the message?\",\n", - " labels=sentiment\n", - " ),\n", - " rg.MultiLabelQuestion(\n", - " name=\"topics\",\n", - " title=\"Select the topic(s) of the message?\",\n", - " labels=topic,\n", - " visible_labels=4\n", - " )\n", - " ],\n", - " guidelines=(\n", - " \"This dataset contains messages from a bank's customer support chatbot. \"\n", - " \"The goal is to label the sentiment and topics of the messages.\"\n", - " )\n", - ")" - ] + "16993356757e4ee5b7f8042d58c96e17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create synthetic data" - ] + "16d42bc00dfe4467a1da86b1d2391d0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will use LLMs to generate the synthetic data for each step of the NLP task. First, we will create `text` requests from customers for a bank. Next, we will create input for the `LabelQuestion` to assess the sentiment of the requests and lastly, we will create input for the `MultiLabelQuestion` to classify the requests.\n", - "\n", - "We will do this using [OpenAI](https://github.com/openai/openai-python) models in combination with the [LangChain](https://github.com/langchain-ai/langchain) and open-source transformer-based models in combination with [Outlines](https://github.com/normal-computing/outlines) packages.\n", - "\n", - " | The LangChain framework is a wrapper around LLM models that allows for easier data-aware and agent-based LLM models. \n", - "\n", - " | Outlines is a Python library to write reliable programs for conditional generation during interactions with generative models.\n", - " \n", - "
\n", - "\n", - "Note \n", - "\n", - "The process of prompt engineering is a trial-and-error process. Changes somewhere might result in undesirable effects in another place in the language chain. The examples below are just a starting point and can be improved by experimenting with different prompts and templates.\n", - "\n", - "
" - ] + "170a2ee20ab64a9b86db65549a5d4063": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0d7acd8e1a394336aa146e2a442f672c", + "placeholder": "​", + "style": "IPY_MODEL_3e6c2b50b3084d23b575585c288f087e", + "value": "Generating Training Pairs: 100%" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Initialize Generative model\n", - "\n", - "#### LangChain with OpenAI\n", - "\n", - "For the usage of LangChain you need to pass the `OPENAI_API_KEY` environment variable to the `OpenAI` class. You can do this by using the `os` package. The `model` variable is then ready to use in the examples below." - ] + "186f82d150994ac7914d0646fb5ff425": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n", - "openai_model = OpenAI(api_key=os.environ.get(\"OPENAI_API_KEY\"))" - ] + "1d58b40ad6a54c25bd451eda4e7d8069": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Outlines with Transformers\n", - "\n", - "Even though Outlines does provide some support for OpenAI, we will use basic transformers for this example. You can use any generative model from the [HuggingFace model hub](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending) by passing the name of the model to the `transformers` function. The `model` variable is then ready to use in the examples below." - ] + "1ff157a9c8974b07ae97cb115c8d0188": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "transformer_model = models.transformers(\"gpt2\")" - ] + "217760080e494d2d9b0582910d121a28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `TextField`\n", - "\n", - "For creating a review, we rely on free text generation based on a prompt. This should be good enough for our purposes of creating a synthetic dataset as well as keeping the process as simple as possible." - ] + "242f97eb0f0d4ab1830c62686127b717": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5057f8b8144d41ff9d8b82b8602570fc", + "placeholder": "​", + "style": "IPY_MODEL_369bc409052a48f7ac2182715406abef", + "value": "Iteration: 100%" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### LangChain\n", - "\n", - "OpenAI models have been instruction-tuned and can thus be used via LangChain to generate synthetic data. This is done using a `PrompTemplate` that infers information from `topic` and `sentiment` variables that are passed to the `predict()` method." - ] + "255d62fb39454098ab3701753d8d67d6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_77bd2b1f5e57441ab729c6e517279834", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bc0c58d9d798437fb1d40277d8777777", + "value": 1 + } }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'I recently had the pleasure of working with the mortgage team at this bank, and I can confidently say that their level of service and expertise was second to none. They answered all of my questions quickly and took the time to explain the process to me in detail. I felt like they genuinely had my best interests at heart and they made the process of obtaining a mortgage as smooth and stress-free as possible. I would highly recommend this bank for anyone looking to take out a mortgage.'" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "template = (\n", - " \"Write a customer review for a bank. \"\n", - " \"Do that for topic of {topic}. \"\n", - " \"Do that with one a {sentiment} sentiment.\"\n", - ")\n", - "prompt = PromptTemplate(template=template, input_variables=[\"topic\", \"sentiment\"])\n", - "llm_chain_review = LLMChain(prompt=prompt, llm=openai_model)\n", - "\n", - "def generate_langchain_review():\n", - " return llm_chain_review.predict(\n", - " topic=random.choice(topic), \n", - " sentiment=random.choice(sentiment)\n", - " ).strip()\n", - " \n", - "generate_langchain_review()" - ] + "25f9bca647f44645b85a644f03807095": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fa5df54e161e40dbbb21ed96c879444e", + "placeholder": "​", + "style": "IPY_MODEL_16993356757e4ee5b7f8042d58c96e17", + "value": " 1/1 [01:28<00:00, 88.63s/it]" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will now create a function that can generate `n`-random examples to evaluate the performance. As we can expect from the recent generation of OpenAI models, the results look good and seem diverse enough to be used as synthetic data." - ] + "28c40914eac34bcba0c9eb4dac6b0032": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\"I've been a customer of this bank for over 5 years, and I've been completely satisfied with their payment services. The online payment system is easy to use and the customer service team is always quick to respond to any questions I have. I never have to worry about my payments being delayed or lost, which is always reassuring. Highly recommend this bank for anyone looking for reliable payment services!\",\n", - " \"I recently secured a mortgage with this bank and was so impressed with the level of service I received. From the start, the staff was friendly, knowledgeable, and willing to go above and beyond to get me the best deal. The process was straightforward and the paperwork was easy to understand. I'm thrilled with my new mortgage and would highly recommend this bank to anyone looking for a mortgage.\"]" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def generate_n_langchain_reviews(n=2):\n", - " reviews = []\n", - " for n in range(n):\n", - " reviews.append(generate_langchain_review())\n", - " return reviews\n", - "\n", - "langchain_reviews = generate_n_langchain_reviews()\n", - "langchain_reviews" - ] + "2f271b0778974646aaff691227336e91": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_16d42bc00dfe4467a1da86b1d2391d0d", + "placeholder": "​", + "style": "IPY_MODEL_0447a98b5dfe42c899273b9c37bdadad", + "value": " 20/20 [00:00<00:00, 391.01it/s]" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Outlines\n", - "\n", - "Not all generative models are instruction tuned and as useful as modern-day LLMs. So take into account that this should be reflected in your `prompt` and the expected quality of the generated text. " - ] + "369bc409052a48f7ac2182715406abef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"Because of technical questions and consumer protection. Telephone orders are not placed in the bank's database because where homeowners need to know that their bank is registered here, this protection providing a system to check their record is not protected by even their own state laws, which is why I don't believe criminal laws should be used to enforce the bank ATM login, nor should neutral other town or guild banking providers be regulated. These accountants have followed the local law explanations, and they do not deserve criminal sanction for allowing a\"" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "@text.prompt\n", - "def generator(topic, sentiment):\n", - " \"\"\"\n", - " The customer service for {{ topic }} of the bank is {{ sentiment }} because\n", - " \"\"\"\n", - "\n", - "def generate_outlines_review():\n", - " prompt = generator(random.choice(topic), random.choice(sentiment))\n", - " answer = text.generate.continuation(transformer_model, max_tokens=100)(prompt)\n", - " answer = \"Because\"+ answer\n", - " return answer\n", - "\n", - "generate_outlines_review()" - ] + "36b99521f8274a639abb90eb0040d6c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_70a57ad580f847d3bd3123cfe1539305", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0c010df989eb497c810a6f960c6ea41b", + "value": 1 + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will now create a function that can generate `n`-random examples. Looking at the examples, the model seems to generate roughly related texts but in general, the quality proves poorer. It can therefore be recommended to use another type of models, which might be instruction tuned to ensure a higher quality generation. Additionally, Outlines offers more dynamic control over the generation process, which might be used to improve the quality of the generated text too." - ] + "379907416f504f05906454e482da2eaf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Because of damaged card and adds some other attachments from other data on the ToS or database file.\"',\n", - " 'Because of jurisdictional issues,\" said the FTC\\'s executive director, Paul R. Matthewi. \"Technology seems to be without limits in the fraud marketplace as we moved toward a new way of remote law enforcement and convenience.\"\\n\\nIt is unclear, however, how people will get paid—or how many will be affected. In TekSavvy, which relies on similar technology many consumer goods companies rely on to keep their customers healthy, some of America\\'s top credit card holders appear to still need money,']" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } + "38bfdddef0444c0baf9d29248689f846": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3f5aed26eeef4182b360085d83ae795d", + "IPY_MODEL_255d62fb39454098ab3701753d8d67d6", + "IPY_MODEL_25f9bca647f44645b85a644f03807095" ], - "source": [ - "def generate_n_outlines_reviews(n=2):\n", - " reviews = []\n", - " for n in range(n):\n", - " reviews.append(generate_outlines_review())\n", - " return reviews\n", - "\n", - "outlines_reviews = generate_n_outlines_reviews()\n", - "outlines_reviews" - ] + "layout": "IPY_MODEL_ae7fc579502e46f7861e402580586b28" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `LabelQuestion`" - ] + "3d282336f5c3425386a417866f367007": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For this step, we will re-use the generated reviews from `langchain_reviews` and `outlines_reviews` and label their sentiment using the respective frameworks. This will be done by assuming a `str` to be returned from both of the lists of `sentiment` defined above." - ] + "3e622eeea5df47d6a21e015f3e742fa8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### LangChain\n", - "\n", - "We are using a jinja-like `template`, which requires us to define the basic `prompt` as an `input_variable` for LangChain. For the initial example, we are using a demo." - ] + "3e6c2b50b3084d23b575585c288f087e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'positive'" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "template = (\n", - " f'Choose from the sentiments: {sentiment}. '\n", - " 'Return a single sentiment.'\n", - " '{prompt}'\n", - ")\n", - "prompt = PromptTemplate(template=template, input_variables=[\"prompt\"])\n", - "llm_chain_sentiment = LLMChain(prompt=prompt, llm=openai_model)\n", - "\n", - "def get_sentiment_from_langchain(text: str) -> str:\n", - " return llm_chain_sentiment.predict(prompt=text).strip().lower()\n", - "\n", - "get_sentiment_from_langchain(\"I love langchain and openai for sentiment labelling.\")" - ] + "3f5aed26eeef4182b360085d83ae795d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6143886f7acc4591ae5f79ce6f67af4a", + "placeholder": "​", + "style": "IPY_MODEL_486c1a817552432c8fb20e59d0a3f079", + "value": "Epoch: 100%" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We get the sentiment labels for the generated `langchain_reviews`. We can see that the sentiment labels are not always correct, but they are mostly correct. This is because the LLMs are not perfect, but they are good enough to be used for synthetic data generation and providing suggestions for human annotators." - ] + "3fd94ef662db4fff9dde61455b41faf1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_186f82d150994ac7914d0646fb5ff425", + "placeholder": "​", + "style": "IPY_MODEL_379907416f504f05906454e482da2eaf", + "value": " 1/1 [00:02<00:00, 2.63s/it]" + } }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['positive', 'positive']" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } + "411de4b297fe4a09acb70951c9f36b82": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c2eac9934f5b407c8e424ee2da9eea58", + "IPY_MODEL_36b99521f8274a639abb90eb0040d6c0", + "IPY_MODEL_3fd94ef662db4fff9dde61455b41faf1" ], - "source": [ - "langchain_sentiment = [get_sentiment_from_langchain(reviews) for reviews in langchain_reviews]\n", - "langchain_sentiment" - ] + "layout": "IPY_MODEL_d6283b2cf69d45f694633ae1544d47a8" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Outlines\n", - "\n", - "Outlines provide an out-of-the-box implementation for guided labeling with generative, however, in some cases [(zero-shot) classification models from the HuggingFace library](https://huggingface.co/models?pipeline_tag=text-classification&sort=trending) can be used to provide a good point for the providing suggestions during a labeling process too. Take a look at our [example with SetFit](labelling-feedback-setfit.ipynb)." - ] + "486c1a817552432c8fb20e59d0a3f079": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'positive'" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } + "501d213a24064f998d4d3c45255d02b7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "503d373bd18b4b79a1f694916734d903": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6e9e5e1ac58945d0926a85c1fd29ab17", + "IPY_MODEL_cc9ccdfefca941e1813258a19afe64ed", + "IPY_MODEL_c2238acd18b844c0bb517d670b76ca5c" ], - "source": [ - "def get_sentiment_from_outlines(text: Union[str, list]) -> str:\n", - " return generate.choice(transformer_model, sentiment)(text)\n", - "\n", - "get_sentiment_from_outlines(\"I love outlines and transformers for sentiment labelling.\")" - ] + "layout": "IPY_MODEL_90eec4e8ae8b42268548588db2fcbf49" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can use the `choice`-methods with a list of strings too." - ] + "5057f8b8144d41ff9d8b82b8602570fc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['neutral', 'positive']" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "outlines_sentiment = get_sentiment_from_outlines(outlines_reviews)\n", - "outlines_sentiment" - ] + "5692de58835a466695fcc8f0d5976b74": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `MultiLabelQuestion`\n", - "\n", - "For this step, we will re-use the generated reviews from `lanchain_reviews` and `outlines_reviews` and label their topics as part of a multi-label classification problem." - ] + "571fd48c2da8432e8a74e7b318eb6042": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Langchain\n", - "\n", - "Note that we are now using an output parser as a post-processing step for the returned output. We do this to ensure that we can obtain a `List[str]`. We will use the built-in `CommaSeparatedListOutputParser`, which split strings by comma and returns a list of strings as output. And we are using the jinja-like templating in a similar way as with the `SingleLabelQuestion`." - ] + "5a06b8d12b494daeb0624f2e39e06e67": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['new_card', 'mortgage', 'application', 'payments']" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "output_parser = CommaSeparatedListOutputParser()\n", - "template = (\n", - " f'Classify the text as the following topics: {topic}. '\n", - " 'Return zero or more topics as a comma-separated list. If zero return an empty string. '\n", - " '{prompt}'\n", - ")\n", - "prompt = PromptTemplate(template=template, input_variables=[\"prompt\"], output_parser=output_parser)\n", - "llm_chain_topics = LLMChain(prompt=prompt, llm=openai_model, output_parser=output_parser)\n", - "\n", - "def get_topics_from_langchain(text: str) -> str:\n", - " return [topic.lower() for topic in llm_chain_topics.predict(prompt=text) if topic != '']\n", - "\n", - "get_topics_from_langchain(f\"I love extracting {topic} with and openai and langchain for topic labelling.\")" - ] + "5cc0f7cc30ae4aa4b13966a773e4c824": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We get the topic labels for the generated `langchain_reviews`." - ] + "5e0377b4b48c441a8d747ea904c3207b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['new_card'], ['mortgage', 'application']]" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } + "6143886f7acc4591ae5f79ce6f67af4a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "621bb7d632814cb0839755ca56098d7a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "668593b82ae54d3cbaf1a19c0307c545": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6e9e5e1ac58945d0926a85c1fd29ab17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_501d213a24064f998d4d3c45255d02b7", + "placeholder": "​", + "style": "IPY_MODEL_3d282336f5c3425386a417866f367007", + "value": "Generating Training Pairs: 100%" + } + }, + "70a57ad580f847d3bd3123cfe1539305": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "750011ef09534e55bab5180974bcf5d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "77bd2b1f5e57441ab729c6e517279834": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "783115bacdbf4c0bb09c0b1fc7976d28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_242f97eb0f0d4ab1830c62686127b717", + "IPY_MODEL_bfecbc09a4f84f3db51903d5048ff825", + "IPY_MODEL_db7cf4427ad746cd86df88f7a1016bc9" ], - "source": [ - "langchain_topics = [get_topics_from_langchain(review) for review in langchain_reviews]\n", - "langchain_topics" - ] + "layout": "IPY_MODEL_668593b82ae54d3cbaf1a19c0307c545" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Outlines\n", - "\n", - "Outlines does not have a direct way to generate data from choices but we are able to leverage their `Pydantic` integration to generate a `json` schema. Note that this is a hacky way to facilitate this guided generation and is not officially mentioned in [the paper behind the outlines package](https://arxiv.org/pdf/2307.09702.pdf). \n", - "\n", - "Additionally, the use of `json` requires `pydantic>=2`.\n", - "\n", - "```python\n", - "# DEMO CODE\n", - "class Topic(BaseModel):\n", - " new_card: bool = False\n", - " mortgage: bool = False\n", - " application: bool = False\n", - " payments: bool = False\n", - "\n", - "def get_topics_from_outlines(text: str) -> str:\n", - " topics = []\n", - " json_data = generate.json(transformer_model, Topic)(langchain_reviews[0])\n", - " for key, value in json_data.items():\n", - " if value:\n", - " topics.append(key)\n", - " return topics\n", - "\n", - "get_topics_from_outlines(f\"I love extracting {topic} with and outlines and transformers for topic labelling.\")\n", - "```" - ] + "7a12fbf5400a468fbdce4b2b2008eefc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create synthetic records" - ] + "7b96b0a21eba4ad5a4c12534940b3591": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we have our synthetic data and predictions, we can use them to create Argilla records. We will create completely artificial records from the `text` for the `TextField` and we will assign the `sentiment` and `topics` as model suggestions for the `LabelQuestion` and `MultiLabelQuestion`, respectively. These suggestions will help the annotators to label the data faster and more accurately, but instead of using them as suggestions, you would also be able to apply them as [annotated responses](../../../practical_guides/create_update_dataset/suggestions_and_responses.md) directly." - ] + "7ca015b6798947d58d275de6181fe053": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For demo purposes, we will only create records with synthetic data obtained from `langchain`." - ] + "90eec4e8ae8b42268548588db2fcbf49": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "FeedbackRecord(fields={'text': '\\n\\nI recently applied for a mortgage at this bank and the process was simple and straightforward. The customer service team was helpful and knowledgeable, and their rates were competitive. Overall, I had a good experience.'}, metadata={}, responses=[], suggestions=(SuggestionSchema(question_id=None, question_name='sentiment', type=None, score=None, value='positive', agent=None), SuggestionSchema(question_id=None, question_name='topics', type=None, score=None, value=['mortgage', 'application'], agent=None)), external_id=None)" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } + "980f36d72cfa403aad67e871aecba890": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f5e35991e6d849eca73282c9c359000a", + "placeholder": "​", + "style": "IPY_MODEL_5a06b8d12b494daeb0624f2e39e06e67", + "value": " 1540/1540 [01:28<00:00, 21.45it/s]" + } + }, + "9a7c8861a37b41eba191059546f5dd5d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9ce0af78a2241e697a22229db7840ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7a12fbf5400a468fbdce4b2b2008eefc", + "placeholder": "​", + "style": "IPY_MODEL_04150cf7e9a74a04aafa94d394553630", + "value": "Iteration: 100%" + } + }, + "ae6ffc6572b54c059196983da4ff2d79": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9a7c8861a37b41eba191059546f5dd5d", + "max": 1540, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_217760080e494d2d9b0582910d121a28", + "value": 1540 + } + }, + "ae7fc579502e46f7861e402580586b28": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bc0c58d9d798437fb1d40277d8777777": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bfecbc09a4f84f3db51903d5048ff825": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5cc0f7cc30ae4aa4b13966a773e4c824", + "max": 60, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_28c40914eac34bcba0c9eb4dac6b0032", + "value": 60 + } + }, + "c21e90a6dda643d8bd82abf4e346d45c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_170a2ee20ab64a9b86db65549a5d4063", + "IPY_MODEL_fd7c2acc4b1945feabe6715dd270cb72", + "IPY_MODEL_2f271b0778974646aaff691227336e91" ], - "source": [ - "def create_synthetic_record():\n", - " review = generate_langchain_review()\n", - " record = rg.FeedbackRecord(\n", - " fields={\n", - " \"text\": review,\n", - " }\n", - " )\n", - " sentiment = get_sentiment_from_langchain(review)\n", - " topics = get_topics_from_langchain(review)\n", - " record.update(suggestions=[\n", - " {\"question_name\": \"sentiment\", \"value\": sentiment}, \n", - " {\"question_name\": \"topics\", \"value\": topics}\n", - " ])\n", - " return record\n", - "\n", - "record = create_synthetic_record()\n", - "record" - ] + "layout": "IPY_MODEL_ef245777ac3d435e8715fc55b1d4824c" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will then add the synthetic `record` to the dataset, and upload the model data and dataset to the Argilla server" - ] + "c2238acd18b844c0bb517d670b76ca5c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1d58b40ad6a54c25bd451eda4e7d8069", + "placeholder": "​", + "style": "IPY_MODEL_5e0377b4b48c441a8d747ea904c3207b", + "value": " 20/20 [00:01<00:00, 10.96it/s]" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset.add_records([record])\n", - "remote_dataset = dataset.push_to_argilla(name=\"synthetic-data\", workspace=\"argilla\")" - ] + "c2eac9934f5b407c8e424ee2da9eea58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7ca015b6798947d58d275de6181fe053", + "placeholder": "​", + "style": "IPY_MODEL_750011ef09534e55bab5180974bcf5d4", + "value": "Epoch: 100%" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "In this tutorial, we have covered how to create synthetic data using OpenAI and Lanchain, or Transformers and Outlines. We have highlighted some caveats to synthetic data generation when it comes to prompt engineering. Finally, we've shown how to use this synthesized data as input and suggestions for Argilla records.\n", - "\n", - "To learn more about LLMs, LangChain and OpenAI check out these links:\n", - "\n", - "- [Outlines](https://github.com/normal-computing/outlines)\n", - "- [LangChain](https://github.com/langchain-ai/langchain)" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "argilla", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "vscode": { - "interpreter": { - "hash": "2d98cb9bf90a932b5bf8e86e91214497eb0e38eb318595fbd6fbd5460fe92036" - } - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "04150cf7e9a74a04aafa94d394553630": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0447a98b5dfe42c899273b9c37bdadad": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0c010df989eb497c810a6f960c6ea41b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "0d7acd8e1a394336aa146e2a442f672c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "16993356757e4ee5b7f8042d58c96e17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "16d42bc00dfe4467a1da86b1d2391d0d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "170a2ee20ab64a9b86db65549a5d4063": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0d7acd8e1a394336aa146e2a442f672c", - "placeholder": "​", - "style": "IPY_MODEL_3e6c2b50b3084d23b575585c288f087e", - "value": "Generating Training Pairs: 100%" - } - }, - "186f82d150994ac7914d0646fb5ff425": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1d58b40ad6a54c25bd451eda4e7d8069": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1ff157a9c8974b07ae97cb115c8d0188": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "217760080e494d2d9b0582910d121a28": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "242f97eb0f0d4ab1830c62686127b717": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5057f8b8144d41ff9d8b82b8602570fc", - "placeholder": "​", - "style": "IPY_MODEL_369bc409052a48f7ac2182715406abef", - "value": "Iteration: 100%" - } - }, - "255d62fb39454098ab3701753d8d67d6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_77bd2b1f5e57441ab729c6e517279834", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_bc0c58d9d798437fb1d40277d8777777", - "value": 1 - } - }, - "25f9bca647f44645b85a644f03807095": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fa5df54e161e40dbbb21ed96c879444e", - "placeholder": "​", - "style": "IPY_MODEL_16993356757e4ee5b7f8042d58c96e17", - "value": " 1/1 [01:28<00:00, 88.63s/it]" - } - }, - "28c40914eac34bcba0c9eb4dac6b0032": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2f271b0778974646aaff691227336e91": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_16d42bc00dfe4467a1da86b1d2391d0d", - "placeholder": "​", - "style": "IPY_MODEL_0447a98b5dfe42c899273b9c37bdadad", - "value": " 20/20 [00:00<00:00, 391.01it/s]" - } - }, - "369bc409052a48f7ac2182715406abef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "36b99521f8274a639abb90eb0040d6c0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_70a57ad580f847d3bd3123cfe1539305", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0c010df989eb497c810a6f960c6ea41b", - "value": 1 - } - }, - "379907416f504f05906454e482da2eaf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "38bfdddef0444c0baf9d29248689f846": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3f5aed26eeef4182b360085d83ae795d", - "IPY_MODEL_255d62fb39454098ab3701753d8d67d6", - "IPY_MODEL_25f9bca647f44645b85a644f03807095" - ], - "layout": "IPY_MODEL_ae7fc579502e46f7861e402580586b28" - } - }, - "3d282336f5c3425386a417866f367007": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3e622eeea5df47d6a21e015f3e742fa8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3e6c2b50b3084d23b575585c288f087e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3f5aed26eeef4182b360085d83ae795d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6143886f7acc4591ae5f79ce6f67af4a", - "placeholder": "​", - "style": "IPY_MODEL_486c1a817552432c8fb20e59d0a3f079", - "value": "Epoch: 100%" - } - }, - "3fd94ef662db4fff9dde61455b41faf1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_186f82d150994ac7914d0646fb5ff425", - "placeholder": "​", - "style": "IPY_MODEL_379907416f504f05906454e482da2eaf", - "value": " 1/1 [00:02<00:00, 2.63s/it]" - } - }, - "411de4b297fe4a09acb70951c9f36b82": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c2eac9934f5b407c8e424ee2da9eea58", - "IPY_MODEL_36b99521f8274a639abb90eb0040d6c0", - "IPY_MODEL_3fd94ef662db4fff9dde61455b41faf1" - ], - "layout": "IPY_MODEL_d6283b2cf69d45f694633ae1544d47a8" - } - }, - "486c1a817552432c8fb20e59d0a3f079": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "501d213a24064f998d4d3c45255d02b7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "503d373bd18b4b79a1f694916734d903": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6e9e5e1ac58945d0926a85c1fd29ab17", - "IPY_MODEL_cc9ccdfefca941e1813258a19afe64ed", - "IPY_MODEL_c2238acd18b844c0bb517d670b76ca5c" - ], - "layout": "IPY_MODEL_90eec4e8ae8b42268548588db2fcbf49" - } - }, - "5057f8b8144d41ff9d8b82b8602570fc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5692de58835a466695fcc8f0d5976b74": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "571fd48c2da8432e8a74e7b318eb6042": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5a06b8d12b494daeb0624f2e39e06e67": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5cc0f7cc30ae4aa4b13966a773e4c824": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5e0377b4b48c441a8d747ea904c3207b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6143886f7acc4591ae5f79ce6f67af4a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "621bb7d632814cb0839755ca56098d7a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "668593b82ae54d3cbaf1a19c0307c545": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6e9e5e1ac58945d0926a85c1fd29ab17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_501d213a24064f998d4d3c45255d02b7", - "placeholder": "​", - "style": "IPY_MODEL_3d282336f5c3425386a417866f367007", - "value": "Generating Training Pairs: 100%" - } - }, - "70a57ad580f847d3bd3123cfe1539305": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "750011ef09534e55bab5180974bcf5d4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "77bd2b1f5e57441ab729c6e517279834": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "783115bacdbf4c0bb09c0b1fc7976d28": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_242f97eb0f0d4ab1830c62686127b717", - "IPY_MODEL_bfecbc09a4f84f3db51903d5048ff825", - "IPY_MODEL_db7cf4427ad746cd86df88f7a1016bc9" - ], - "layout": "IPY_MODEL_668593b82ae54d3cbaf1a19c0307c545" - } - }, - "7a12fbf5400a468fbdce4b2b2008eefc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7b96b0a21eba4ad5a4c12534940b3591": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7ca015b6798947d58d275de6181fe053": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "90eec4e8ae8b42268548588db2fcbf49": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "980f36d72cfa403aad67e871aecba890": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f5e35991e6d849eca73282c9c359000a", - "placeholder": "​", - "style": "IPY_MODEL_5a06b8d12b494daeb0624f2e39e06e67", - "value": " 1540/1540 [01:28<00:00, 21.45it/s]" - } - }, - "9a7c8861a37b41eba191059546f5dd5d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a9ce0af78a2241e697a22229db7840ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7a12fbf5400a468fbdce4b2b2008eefc", - "placeholder": "​", - "style": "IPY_MODEL_04150cf7e9a74a04aafa94d394553630", - "value": "Iteration: 100%" - } - }, - "ae6ffc6572b54c059196983da4ff2d79": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9a7c8861a37b41eba191059546f5dd5d", - "max": 1540, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_217760080e494d2d9b0582910d121a28", - "value": 1540 - } - }, - "ae7fc579502e46f7861e402580586b28": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bc0c58d9d798437fb1d40277d8777777": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "bfecbc09a4f84f3db51903d5048ff825": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5cc0f7cc30ae4aa4b13966a773e4c824", - "max": 60, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_28c40914eac34bcba0c9eb4dac6b0032", - "value": 60 - } - }, - "c21e90a6dda643d8bd82abf4e346d45c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_170a2ee20ab64a9b86db65549a5d4063", - "IPY_MODEL_fd7c2acc4b1945feabe6715dd270cb72", - "IPY_MODEL_2f271b0778974646aaff691227336e91" - ], - "layout": "IPY_MODEL_ef245777ac3d435e8715fc55b1d4824c" - } - }, - "c2238acd18b844c0bb517d670b76ca5c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1d58b40ad6a54c25bd451eda4e7d8069", - "placeholder": "​", - "style": "IPY_MODEL_5e0377b4b48c441a8d747ea904c3207b", - "value": " 20/20 [00:01<00:00, 10.96it/s]" - } - }, - "c2eac9934f5b407c8e424ee2da9eea58": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7ca015b6798947d58d275de6181fe053", - "placeholder": "​", - "style": "IPY_MODEL_750011ef09534e55bab5180974bcf5d4", - "value": "Epoch: 100%" - } - }, - "cc9ccdfefca941e1813258a19afe64ed": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7b96b0a21eba4ad5a4c12534940b3591", - "max": 20, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_571fd48c2da8432e8a74e7b318eb6042", - "value": 20 - } - }, - "d11aa6a0c8c54481b6cc2c80d1fa0ba1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_a9ce0af78a2241e697a22229db7840ab", - "IPY_MODEL_ae6ffc6572b54c059196983da4ff2d79", - "IPY_MODEL_980f36d72cfa403aad67e871aecba890" - ], - "layout": "IPY_MODEL_5692de58835a466695fcc8f0d5976b74" - } - }, - "d6283b2cf69d45f694633ae1544d47a8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "db7cf4427ad746cd86df88f7a1016bc9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3e622eeea5df47d6a21e015f3e742fa8", - "placeholder": "​", - "style": "IPY_MODEL_621bb7d632814cb0839755ca56098d7a", - "value": " 60/60 [00:02<00:00, 23.09it/s]" - } - }, - "ef245777ac3d435e8715fc55b1d4824c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f5e35991e6d849eca73282c9c359000a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fa5df54e161e40dbbb21ed96c879444e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fd7c2acc4b1945feabe6715dd270cb72": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ff7f98b368c448ea81e4c79fded0be5a", - "max": 20, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1ff157a9c8974b07ae97cb115c8d0188", - "value": 20 - } - }, - "ff7f98b368c448ea81e4c79fded0be5a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - } + "cc9ccdfefca941e1813258a19afe64ed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7b96b0a21eba4ad5a4c12534940b3591", + "max": 20, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_571fd48c2da8432e8a74e7b318eb6042", + "value": 20 + } + }, + "d11aa6a0c8c54481b6cc2c80d1fa0ba1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a9ce0af78a2241e697a22229db7840ab", + "IPY_MODEL_ae6ffc6572b54c059196983da4ff2d79", + "IPY_MODEL_980f36d72cfa403aad67e871aecba890" + ], + "layout": "IPY_MODEL_5692de58835a466695fcc8f0d5976b74" + } + }, + "d6283b2cf69d45f694633ae1544d47a8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "db7cf4427ad746cd86df88f7a1016bc9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3e622eeea5df47d6a21e015f3e742fa8", + "placeholder": "​", + "style": "IPY_MODEL_621bb7d632814cb0839755ca56098d7a", + "value": " 60/60 [00:02<00:00, 23.09it/s]" + } + }, + "ef245777ac3d435e8715fc55b1d4824c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f5e35991e6d849eca73282c9c359000a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fa5df54e161e40dbbb21ed96c879444e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd7c2acc4b1945feabe6715dd270cb72": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ff7f98b368c448ea81e4c79fded0be5a", + "max": 20, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1ff157a9c8974b07ae97cb115c8d0188", + "value": 20 + } + }, + "ff7f98b368c448ea81e4c79fded0be5a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-setfit.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-setfit.ipynb index 61dac6fdaf..de5dab622d 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-setfit.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-feedback-setfit.ipynb @@ -1,2993 +1,2991 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ✨ Add zero-shot text classification suggestions using SetFit\n", - "\n", - "Suggestions are a wonderful way to make things easier and faster for your annotation team. These preselected options will make the labelling process more efficient, as they will only need to correct the suggestions. \n", - "\n", - "In this example, we will demonstrate how to implement a zero-shot approach using SetFit to get some initial suggestions for dataset that combines two text classification tasks that include a `LabelQuestion` and a `MultiLabelQuestion`.\n", - "\n", - "Let's get started!\n", - "\n", - "![Feedback Task dataset with suggestions made using SetFit](/_static/images/llms/labelling-feedback-setfit/snapshot_setfit_suggestions.png)\n", - "\n", - "
\n", - "\n", - "Note \n", - "\n", - "This tutorial is a Jupyter Notebook. There are two options to run it:\n", - "\n", - "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", - "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", - "\n", - "
\n" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ✨ Add zero-shot text classification suggestions using SetFit\n", + "\n", + "Suggestions are a wonderful way to make things easier and faster for your annotation team. These preselected options will make the labelling process more efficient, as they will only need to correct the suggestions. \n", + "\n", + "In this example, we will demonstrate how to implement a zero-shot approach using SetFit to get some initial suggestions for dataset that combines two text classification tasks that include a `LabelQuestion` and a `MultiLabelQuestion`.\n", + "\n", + "Let's get started!\n", + "\n", + "![Feedback Task dataset with suggestions made using SetFit](/_static/images/llms/labelling-feedback-setfit/snapshot_setfit_suggestions.png)\n", + "\n", + "
\n", + "\n", + "Note \n", + "\n", + "This tutorial is a Jupyter Notebook. There are two options to run it:\n", + "\n", + "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", + "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", + "\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "For this tutorial, you will need to have an Argilla server running. If you don't have one already, check out our [Quickstart](../../../getting_started/quickstart.md) or [Installation](../../../getting_started/quickstart_installation.ipynb) pages. Once you do, complete the following steps:\n", + "\n", + "1. Install the Argilla client and the required third-party libraries using `pip`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yN2atS0RE2pF" + }, + "outputs": [], + "source": [ + "!pip install argilla setfit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Let's make the necessary imports:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "POQgkfrWEg1u" + }, + "outputs": [], + "source": [ + "import argilla as rg\n", + "from datasets import load_dataset\n", + "from setfit import get_templated_dataset\n", + "from setfit import SetFitModel, SetFitTrainer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", + "# Replace api_key if you configured a custom API key\n", + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Set the HF_TOKEN environment variable\n", + "# import os\n", + "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", + "\n", + "# # Replace api_url with the url to your HF Spaces URL\n", + "# # Replace api_key if you configured a custom API key\n", + "# rg.init(\n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", + "# api_key=\"admin.apikey\",\n", + "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from argilla.utils.telemetry import tutorial_running\n", + "\n", + " tutorial_running()\n", + "except ImportError:\n", + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure the dataset" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we will load a popular open-source dataset that has customer requests in the banking domain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0UsoG5OtE11w" + }, + "outputs": [], + "source": [ + "data = load_dataset(\"PolyAI/banking77\", split=\"test\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will configure our dataset with two different questions so that we can work with two text classification tasks at the same time. In this case, we will load the original labels of this dataset to make a multi-label classification of the topics mentioned in the request and we will also set up a question to classify the sentiment of the request as either \"positive\", \"neutral\" or \"negative\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KKu2QplpFDgw" + }, + "outputs": [], + "source": [ + "dataset = rg.FeedbackDataset(\n", + " fields=[rg.TextField(name=\"text\")],\n", + " questions=[\n", + " rg.MultiLabelQuestion(\n", + " name=\"topics\",\n", + " title=\"Select the topic(s) of the request\",\n", + " labels=data.info.features[\n", + " \"label\"\n", + " ].names, # these are the original labels present in the dataset\n", + " visible_labels=10,\n", + " ),\n", + " rg.LabelQuestion(\n", + " name=\"sentiment\",\n", + " title=\"What is the sentiment of the message?\",\n", + " labels=[\"positive\", \"neutral\", \"negative\"],\n", + " ),\n", + " ],\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the models" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we will use the data we loaded and the labels and questions we configured for our dataset to train a zero-shot text classification model for each of the questions in our dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def train_model(question_name, template, multi_label=False):\n", + " # build a training dataset that uses the labels of a specific question in our Argilla dataset\n", + " train_dataset = get_templated_dataset(\n", + " candidate_labels=dataset.question_by_name(question_name).labels,\n", + " sample_size=8,\n", + " template=template,\n", + " multi_label=multi_label,\n", + " )\n", + "\n", + " # train a model using the training dataset we just built\n", + " if multi_label:\n", + " model = SetFitModel.from_pretrained(\n", + " \"all-MiniLM-L6-v2\", multi_target_strategy=\"one-vs-rest\"\n", + " )\n", + " else:\n", + " model = SetFitModel.from_pretrained(\"all-MiniLM-L6-v2\")\n", + "\n", + " trainer = SetFitTrainer(model=model, train_dataset=train_dataset)\n", + " trainer.train()\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 276, + "referenced_widgets": [ + "503d373bd18b4b79a1f694916734d903", + "6e9e5e1ac58945d0926a85c1fd29ab17", + "cc9ccdfefca941e1813258a19afe64ed", + "c2238acd18b844c0bb517d670b76ca5c", + "90eec4e8ae8b42268548588db2fcbf49", + "501d213a24064f998d4d3c45255d02b7", + "3d282336f5c3425386a417866f367007", + "7b96b0a21eba4ad5a4c12534940b3591", + "571fd48c2da8432e8a74e7b318eb6042", + "1d58b40ad6a54c25bd451eda4e7d8069", + "5e0377b4b48c441a8d747ea904c3207b", + "38bfdddef0444c0baf9d29248689f846", + "3f5aed26eeef4182b360085d83ae795d", + "255d62fb39454098ab3701753d8d67d6", + "25f9bca647f44645b85a644f03807095", + "ae7fc579502e46f7861e402580586b28", + "6143886f7acc4591ae5f79ce6f67af4a", + "486c1a817552432c8fb20e59d0a3f079", + "77bd2b1f5e57441ab729c6e517279834", + "bc0c58d9d798437fb1d40277d8777777", + "fa5df54e161e40dbbb21ed96c879444e", + "16993356757e4ee5b7f8042d58c96e17", + "d11aa6a0c8c54481b6cc2c80d1fa0ba1", + "a9ce0af78a2241e697a22229db7840ab", + "ae6ffc6572b54c059196983da4ff2d79", + "980f36d72cfa403aad67e871aecba890", + "5692de58835a466695fcc8f0d5976b74", + "7a12fbf5400a468fbdce4b2b2008eefc", + "04150cf7e9a74a04aafa94d394553630", + "9a7c8861a37b41eba191059546f5dd5d", + "217760080e494d2d9b0582910d121a28", + "f5e35991e6d849eca73282c9c359000a", + "5a06b8d12b494daeb0624f2e39e06e67" + ] }, + "id": "U9TVO355a2np", + "outputId": "7d6b6b60-6f49-4308-a2e6-ac24bf99bf72" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "For this tutorial, you will need to have an Argilla server running. If you don't have one already, check out our [Quickstart](../../../getting_started/quickstart.md) or [Installation](../../../getting_started/quickstart_installation.ipynb) pages. Once you do, complete the following steps:\n", - "\n", - "1. Install the Argilla client and the required third-party libraries using `pip`:" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "config.json not found in HuggingFace Hub.\n", + "WARNING:huggingface_hub.hub_mixin:config.json not found in HuggingFace Hub.\n", + "model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yN2atS0RE2pF" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "503d373bd18b4b79a1f694916734d903", + "version_major": 2, + "version_minor": 0 }, - "outputs": [], - "source": [ - "!pip install argilla setfit" + "text/plain": [ + "Generating Training Pairs: 0%| | 0/20 [00:00\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textlabeltopicssentiment
0How do I locate my card?11[{'label': 'activate_my_card', 'score': 0.0127...[{'label': 'positive', 'score': 0.348371499634...
1I still have not received my new card, I order...11[{'label': 'activate_my_card', 'score': 0.0133...[{'label': 'positive', 'score': 0.361745933281...
2I ordered a card but it has not arrived. Help ...11[{'label': 'activate_my_card', 'score': 0.0094...[{'label': 'positive', 'score': 0.346292075496...
3Is there a way to know when my card will arrive?11[{'label': 'activate_my_card', 'score': 0.0150...[{'label': 'positive', 'score': 0.426133716131...
4My card has not arrived yet.11[{'label': 'activate_my_card', 'score': 0.0175...[{'label': 'positive', 'score': 0.389241385165...
\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "\n", + "\n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + "
\n", + " \n" + ], + "text/plain": [ + " text label \\\n", + "0 How do I locate my card? 11 \n", + "1 I still have not received my new card, I order... 11 \n", + "2 I ordered a card but it has not arrived. Help ... 11 \n", + "3 Is there a way to know when my card will arrive? 11 \n", + "4 My card has not arrived yet. 11 \n", + "\n", + " topics \\\n", + "0 [{'label': 'activate_my_card', 'score': 0.0127... \n", + "1 [{'label': 'activate_my_card', 'score': 0.0133... \n", + "2 [{'label': 'activate_my_card', 'score': 0.0094... \n", + "3 [{'label': 'activate_my_card', 'score': 0.0150... \n", + "4 [{'label': 'activate_my_card', 'score': 0.0175... \n", + "\n", + " sentiment \n", + "0 [{'label': 'positive', 'score': 0.348371499634... \n", + "1 [{'label': 'positive', 'score': 0.361745933281... \n", + "2 [{'label': 'positive', 'score': 0.346292075496... \n", + "3 [{'label': 'positive', 'score': 0.426133716131... \n", + "4 [{'label': 'positive', 'score': 0.389241385165... " ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.to_pandas().head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build records and push" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the data and the predictions we have produced, now we can build records that include the suggestions from our models. In the case of the `LabelQuestion` we will use the label that received the highest probability score and for the `MultiLabelQuestion` we will include all labels with a score above a certain threshold. In this case, we decided to go for `2/len(labels)`, but you can experiment with your data and decide to go for a more restrictive or more lenient threshold. \n", + "\n", + ".. hint:: Note that more lenient thresholds (closer or equal to `1/len(labels)`) will suggest more labels and restrictive thresholds (between 2 and 3) will select fewer (or no) labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def add_suggestions(record):\n", + " suggestions = []\n", + "\n", + " # get label with max score for sentiment question\n", + " sentiment = max(record[\"sentiment\"], key=lambda x: x[\"score\"])[\"label\"]\n", + " suggestions.append({\"question_name\": \"sentiment\", \"value\": sentiment})\n", + "\n", + " # get all labels above a threshold for topics questions\n", + " threshold = 2 / len(dataset.question_by_name(\"topics\").labels)\n", + " topics = [\n", + " label[\"label\"] for label in record[\"topics\"] if label[\"score\"] >= threshold\n", + " ]\n", + " # apply the suggestion only if at least one label was over the threshold\n", + " if topics:\n", + " suggestions.append({\"question_name\": \"topics\", \"value\": topics})\n", + " return suggestions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S0I4lkIWqmin" + }, + "outputs": [], + "source": [ + "records = [\n", + " rg.FeedbackRecord(\n", + " fields={\"text\": record[\"text\"]}, suggestions=add_suggestions(record)\n", + " )\n", + " for record in data\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once we are happy with the result, we can add the records to the dataset that we configured above, push it to Argilla and start annotating." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CvVgNhQSibLM" + }, + "outputs": [], + "source": [ + "dataset.add_records(records)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "l2pdzhuspBA_", + "outputId": "a296c87f-35a3-4476-8ed1-56e1f053a953" + }, + "outputs": [ { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this example, we will load a popular open-source dataset that has customer requests in the banking domain." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "Pushing records to Argilla...: 100%|██████████| 97/97 [00:21<00:00, 4.58it/s]\n" + ] + } + ], + "source": [ + "dataset.push_to_argilla(\"setfit_tutorial\", workspace=\"admin\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is how the UI will look like with the suggestions from our models:\n", + "![Feedback Task dataset with suggestions made using SetFit](/_static/images/llms/labelling-feedback-setfit/snapshot_setfit_suggestions.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In this tutorial, we have covered how to add suggestions to a Feedback Task dataset using a zero-shot approach with the SetFit library. This will help with the efficiency of the labelling process by lowering the number of decisions and edits that the annotation team must make.\n", + "\n", + "To learn more about SetFit check out these links:\n", + "\n", + "- [More SetFit + Argilla tutorials](../../../tutorials/libraries/setfit.md)\n", + "- [SetFit repo on GitHub](https://github.com/huggingface/setfit)\n", + "- [SetFit documentation](https://huggingface.co/docs/setfit/index)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "argilla", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.15" + }, + "vscode": { + "interpreter": { + "hash": "2d98cb9bf90a932b5bf8e86e91214497eb0e38eb318595fbd6fbd5460fe92036" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "04150cf7e9a74a04aafa94d394553630": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0UsoG5OtE11w" - }, - "outputs": [], - "source": [ - "data = load_dataset(\"PolyAI/banking77\", split=\"test\")" - ] + "0447a98b5dfe42c899273b9c37bdadad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will configure our dataset with two different questions so that we can work with two text classification tasks at the same time. In this case, we will load the original labels of this dataset to make a multi-label classification of the topics mentioned in the request and we will also set up a question to classify the sentiment of the request as either \"positive\", \"neutral\" or \"negative\"." - ] + "0c010df989eb497c810a6f960c6ea41b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KKu2QplpFDgw" - }, - "outputs": [], - "source": [ - "dataset = rg.FeedbackDataset(\n", - " fields = [rg.TextField(name=\"text\")],\n", - " questions = [\n", - " rg.MultiLabelQuestion(\n", - " name=\"topics\",\n", - " title=\"Select the topic(s) of the request\",\n", - " labels=data.info.features['label'].names, #these are the original labels present in the dataset\n", - " visible_labels=10\n", - " ),\n", - " rg.LabelQuestion(\n", - " name=\"sentiment\",\n", - " title=\"What is the sentiment of the message?\",\n", - " labels=[\"positive\", \"neutral\", \"negative\"]\n", - " )\n", - " ]\n", - ")" - ] + "0d7acd8e1a394336aa146e2a442f672c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train the models" - ] + "16993356757e4ee5b7f8042d58c96e17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we will use the data we loaded and the labels and questions we configured for our dataset to train a zero-shot text classification model for each of the questions in our dataset." - ] + "16d42bc00dfe4467a1da86b1d2391d0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def train_model(question_name, template, multi_label=False):\n", - " # build a training dataset that uses the labels of a specific question in our Argilla dataset\n", - " train_dataset = get_templated_dataset(\n", - " candidate_labels=dataset.question_by_name(question_name).labels,\n", - " sample_size=8,\n", - " template=template,\n", - " multi_label=multi_label\n", - " )\n", - "\n", - " # train a model using the training dataset we just built\n", - " if multi_label:\n", - " model = SetFitModel.from_pretrained(\n", - " \"all-MiniLM-L6-v2\",\n", - " multi_target_strategy=\"one-vs-rest\"\n", - " )\n", - " else:\n", - " model = SetFitModel.from_pretrained(\n", - " \"all-MiniLM-L6-v2\"\n", - " )\n", - "\n", - " trainer = SetFitTrainer(\n", - " model=model,\n", - " train_dataset=train_dataset\n", - " )\n", - " trainer.train()\n", - " return model" - ] + "170a2ee20ab64a9b86db65549a5d4063": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0d7acd8e1a394336aa146e2a442f672c", + "placeholder": "​", + "style": "IPY_MODEL_3e6c2b50b3084d23b575585c288f087e", + "value": "Generating Training Pairs: 100%" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 276, - "referenced_widgets": [ - "503d373bd18b4b79a1f694916734d903", - "6e9e5e1ac58945d0926a85c1fd29ab17", - "cc9ccdfefca941e1813258a19afe64ed", - "c2238acd18b844c0bb517d670b76ca5c", - "90eec4e8ae8b42268548588db2fcbf49", - "501d213a24064f998d4d3c45255d02b7", - "3d282336f5c3425386a417866f367007", - "7b96b0a21eba4ad5a4c12534940b3591", - "571fd48c2da8432e8a74e7b318eb6042", - "1d58b40ad6a54c25bd451eda4e7d8069", - "5e0377b4b48c441a8d747ea904c3207b", - "38bfdddef0444c0baf9d29248689f846", - "3f5aed26eeef4182b360085d83ae795d", - "255d62fb39454098ab3701753d8d67d6", - "25f9bca647f44645b85a644f03807095", - "ae7fc579502e46f7861e402580586b28", - "6143886f7acc4591ae5f79ce6f67af4a", - "486c1a817552432c8fb20e59d0a3f079", - "77bd2b1f5e57441ab729c6e517279834", - "bc0c58d9d798437fb1d40277d8777777", - "fa5df54e161e40dbbb21ed96c879444e", - "16993356757e4ee5b7f8042d58c96e17", - "d11aa6a0c8c54481b6cc2c80d1fa0ba1", - "a9ce0af78a2241e697a22229db7840ab", - "ae6ffc6572b54c059196983da4ff2d79", - "980f36d72cfa403aad67e871aecba890", - "5692de58835a466695fcc8f0d5976b74", - "7a12fbf5400a468fbdce4b2b2008eefc", - "04150cf7e9a74a04aafa94d394553630", - "9a7c8861a37b41eba191059546f5dd5d", - "217760080e494d2d9b0582910d121a28", - "f5e35991e6d849eca73282c9c359000a", - "5a06b8d12b494daeb0624f2e39e06e67" - ] - }, - "id": "U9TVO355a2np", - "outputId": "7d6b6b60-6f49-4308-a2e6-ac24bf99bf72" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "config.json not found in HuggingFace Hub.\n", - "WARNING:huggingface_hub.hub_mixin:config.json not found in HuggingFace Hub.\n", - "model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "503d373bd18b4b79a1f694916734d903", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Generating Training Pairs: 0%| | 0/20 [00:00\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
textlabeltopicssentiment
0How do I locate my card?11[{'label': 'activate_my_card', 'score': 0.0127...[{'label': 'positive', 'score': 0.348371499634...
1I still have not received my new card, I order...11[{'label': 'activate_my_card', 'score': 0.0133...[{'label': 'positive', 'score': 0.361745933281...
2I ordered a card but it has not arrived. Help ...11[{'label': 'activate_my_card', 'score': 0.0094...[{'label': 'positive', 'score': 0.346292075496...
3Is there a way to know when my card will arrive?11[{'label': 'activate_my_card', 'score': 0.0150...[{'label': 'positive', 'score': 0.426133716131...
4My card has not arrived yet.11[{'label': 'activate_my_card', 'score': 0.0175...[{'label': 'positive', 'score': 0.389241385165...
\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - "
\n", - " \n", - "
\n", - "\n", - "\n", - "\n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
\n", - " \n" - ], - "text/plain": [ - " text label \\\n", - "0 How do I locate my card? 11 \n", - "1 I still have not received my new card, I order... 11 \n", - "2 I ordered a card but it has not arrived. Help ... 11 \n", - "3 Is there a way to know when my card will arrive? 11 \n", - "4 My card has not arrived yet. 11 \n", - "\n", - " topics \\\n", - "0 [{'label': 'activate_my_card', 'score': 0.0127... \n", - "1 [{'label': 'activate_my_card', 'score': 0.0133... \n", - "2 [{'label': 'activate_my_card', 'score': 0.0094... \n", - "3 [{'label': 'activate_my_card', 'score': 0.0150... \n", - "4 [{'label': 'activate_my_card', 'score': 0.0175... \n", - "\n", - " sentiment \n", - "0 [{'label': 'positive', 'score': 0.348371499634... \n", - "1 [{'label': 'positive', 'score': 0.361745933281... \n", - "2 [{'label': 'positive', 'score': 0.346292075496... \n", - "3 [{'label': 'positive', 'score': 0.426133716131... \n", - "4 [{'label': 'positive', 'score': 0.389241385165... " - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } + "5692de58835a466695fcc8f0d5976b74": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "571fd48c2da8432e8a74e7b318eb6042": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5a06b8d12b494daeb0624f2e39e06e67": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5cc0f7cc30ae4aa4b13966a773e4c824": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5e0377b4b48c441a8d747ea904c3207b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6143886f7acc4591ae5f79ce6f67af4a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "621bb7d632814cb0839755ca56098d7a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "668593b82ae54d3cbaf1a19c0307c545": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6e9e5e1ac58945d0926a85c1fd29ab17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_501d213a24064f998d4d3c45255d02b7", + "placeholder": "​", + "style": "IPY_MODEL_3d282336f5c3425386a417866f367007", + "value": "Generating Training Pairs: 100%" + } + }, + "70a57ad580f847d3bd3123cfe1539305": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "750011ef09534e55bab5180974bcf5d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "77bd2b1f5e57441ab729c6e517279834": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "783115bacdbf4c0bb09c0b1fc7976d28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_242f97eb0f0d4ab1830c62686127b717", + "IPY_MODEL_bfecbc09a4f84f3db51903d5048ff825", + "IPY_MODEL_db7cf4427ad746cd86df88f7a1016bc9" ], - "source": [ - "data.to_pandas().head()" - ] + "layout": "IPY_MODEL_668593b82ae54d3cbaf1a19c0307c545" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Build records and push" - ] + "7a12fbf5400a468fbdce4b2b2008eefc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With the data and the predictions we have produced, now we can build records that include the suggestions from our models. In the case of the `LabelQuestion` we will use the label that received the highest probability score and for the `MultiLabelQuestion` we will include all labels with a score above a certain threshold. In this case, we decided to go for `2/len(labels)`, but you can experiment with your data and decide to go for a more restrictive or more lenient threshold. \n", - "\n", - ".. hint:: Note that more lenient thresholds (closer or equal to `1/len(labels)`) will suggest more labels and restrictive thresholds (between 2 and 3) will select fewer (or no) labels." - ] + "7b96b0a21eba4ad5a4c12534940b3591": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def add_suggestions(record):\n", - " suggestions = []\n", - " \n", - " # get label with max score for sentiment question\n", - " sentiment = max(record['sentiment'], key=lambda x: x['score'])['label']\n", - " suggestions.append({\"question_name\": \"sentiment\", \"value\": sentiment})\n", - "\n", - " # get all labels above a threshold for topics questions\n", - " threshold = 2 / len(dataset.question_by_name(\"topics\").labels)\n", - " topics = [label['label'] for label in record['topics'] if label['score'] >= threshold]\n", - " # apply the suggestion only if at least one label was over the threshold\n", - " if topics:\n", - " suggestions.append({\"question_name\": \"topics\", \"value\": topics})\n", - " return suggestions" - ] + "7ca015b6798947d58d275de6181fe053": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S0I4lkIWqmin" - }, - "outputs": [], - "source": [ - "records = [\n", - " rg.FeedbackRecord(fields={\"text\": record['text']}, suggestions=add_suggestions(record))\n", - " for record in data\n", - "]" - ] + "90eec4e8ae8b42268548588db2fcbf49": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once we are happy with the result, we can add the records to the dataset that we configured above, push it to Argilla and start annotating." - ] + "980f36d72cfa403aad67e871aecba890": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f5e35991e6d849eca73282c9c359000a", + "placeholder": "​", + "style": "IPY_MODEL_5a06b8d12b494daeb0624f2e39e06e67", + "value": " 1540/1540 [01:28<00:00, 21.45it/s]" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CvVgNhQSibLM" - }, - "outputs": [], - "source": [ - "dataset.add_records(records)" - ] + "9a7c8861a37b41eba191059546f5dd5d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "l2pdzhuspBA_", - "outputId": "a296c87f-35a3-4476-8ed1-56e1f053a953" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Pushing records to Argilla...: 100%|██████████| 97/97 [00:21<00:00, 4.58it/s]\n" - ] - } + "a9ce0af78a2241e697a22229db7840ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7a12fbf5400a468fbdce4b2b2008eefc", + "placeholder": "​", + "style": "IPY_MODEL_04150cf7e9a74a04aafa94d394553630", + "value": "Iteration: 100%" + } + }, + "ae6ffc6572b54c059196983da4ff2d79": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9a7c8861a37b41eba191059546f5dd5d", + "max": 1540, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_217760080e494d2d9b0582910d121a28", + "value": 1540 + } + }, + "ae7fc579502e46f7861e402580586b28": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bc0c58d9d798437fb1d40277d8777777": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bfecbc09a4f84f3db51903d5048ff825": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5cc0f7cc30ae4aa4b13966a773e4c824", + "max": 60, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_28c40914eac34bcba0c9eb4dac6b0032", + "value": 60 + } + }, + "c21e90a6dda643d8bd82abf4e346d45c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_170a2ee20ab64a9b86db65549a5d4063", + "IPY_MODEL_fd7c2acc4b1945feabe6715dd270cb72", + "IPY_MODEL_2f271b0778974646aaff691227336e91" ], - "source": [ - "dataset.push_to_argilla(\"setfit_tutorial\", workspace=\"admin\")" - ] + "layout": "IPY_MODEL_ef245777ac3d435e8715fc55b1d4824c" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is how the UI will look like with the suggestions from our models:\n", - "![Feedback Task dataset with suggestions made using SetFit](/_static/images/llms/labelling-feedback-setfit/snapshot_setfit_suggestions.png)" - ] + "c2238acd18b844c0bb517d670b76ca5c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1d58b40ad6a54c25bd451eda4e7d8069", + "placeholder": "​", + "style": "IPY_MODEL_5e0377b4b48c441a8d747ea904c3207b", + "value": " 20/20 [00:01<00:00, 10.96it/s]" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "In this tutorial, we have covered how to add suggestions to a Feedback Task dataset using a zero-shot approach with the SetFit library. This will help with the efficiency of the labelling process by lowering the number of decisions and edits that the annotation team must make.\n", - "\n", - "To learn more about SetFit check out these links:\n", - "\n", - "- [More SetFit + Argilla tutorials](../../../tutorials/libraries/setfit.md)\n", - "- [SetFit repo on GitHub](https://github.com/huggingface/setfit)\n", - "- [SetFit documentation](https://huggingface.co/docs/setfit/index)" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "argilla", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.8.15" - }, - "vscode": { - "interpreter": { - "hash": "2d98cb9bf90a932b5bf8e86e91214497eb0e38eb318595fbd6fbd5460fe92036" - } - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "04150cf7e9a74a04aafa94d394553630": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0447a98b5dfe42c899273b9c37bdadad": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0c010df989eb497c810a6f960c6ea41b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "0d7acd8e1a394336aa146e2a442f672c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "16993356757e4ee5b7f8042d58c96e17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "16d42bc00dfe4467a1da86b1d2391d0d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "170a2ee20ab64a9b86db65549a5d4063": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0d7acd8e1a394336aa146e2a442f672c", - "placeholder": "​", - "style": "IPY_MODEL_3e6c2b50b3084d23b575585c288f087e", - "value": "Generating Training Pairs: 100%" - } - }, - "186f82d150994ac7914d0646fb5ff425": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1d58b40ad6a54c25bd451eda4e7d8069": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1ff157a9c8974b07ae97cb115c8d0188": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "217760080e494d2d9b0582910d121a28": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "242f97eb0f0d4ab1830c62686127b717": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5057f8b8144d41ff9d8b82b8602570fc", - "placeholder": "​", - "style": "IPY_MODEL_369bc409052a48f7ac2182715406abef", - "value": "Iteration: 100%" - } - }, - "255d62fb39454098ab3701753d8d67d6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_77bd2b1f5e57441ab729c6e517279834", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_bc0c58d9d798437fb1d40277d8777777", - "value": 1 - } - }, - "25f9bca647f44645b85a644f03807095": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fa5df54e161e40dbbb21ed96c879444e", - "placeholder": "​", - "style": "IPY_MODEL_16993356757e4ee5b7f8042d58c96e17", - "value": " 1/1 [01:28<00:00, 88.63s/it]" - } - }, - "28c40914eac34bcba0c9eb4dac6b0032": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2f271b0778974646aaff691227336e91": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_16d42bc00dfe4467a1da86b1d2391d0d", - "placeholder": "​", - "style": "IPY_MODEL_0447a98b5dfe42c899273b9c37bdadad", - "value": " 20/20 [00:00<00:00, 391.01it/s]" - } - }, - "369bc409052a48f7ac2182715406abef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "36b99521f8274a639abb90eb0040d6c0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_70a57ad580f847d3bd3123cfe1539305", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0c010df989eb497c810a6f960c6ea41b", - "value": 1 - } - }, - "379907416f504f05906454e482da2eaf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "38bfdddef0444c0baf9d29248689f846": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3f5aed26eeef4182b360085d83ae795d", - "IPY_MODEL_255d62fb39454098ab3701753d8d67d6", - "IPY_MODEL_25f9bca647f44645b85a644f03807095" - ], - "layout": "IPY_MODEL_ae7fc579502e46f7861e402580586b28" - } - }, - "3d282336f5c3425386a417866f367007": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3e622eeea5df47d6a21e015f3e742fa8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3e6c2b50b3084d23b575585c288f087e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3f5aed26eeef4182b360085d83ae795d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6143886f7acc4591ae5f79ce6f67af4a", - "placeholder": "​", - "style": "IPY_MODEL_486c1a817552432c8fb20e59d0a3f079", - "value": "Epoch: 100%" - } - }, - "3fd94ef662db4fff9dde61455b41faf1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_186f82d150994ac7914d0646fb5ff425", - "placeholder": "​", - "style": "IPY_MODEL_379907416f504f05906454e482da2eaf", - "value": " 1/1 [00:02<00:00, 2.63s/it]" - } - }, - "411de4b297fe4a09acb70951c9f36b82": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c2eac9934f5b407c8e424ee2da9eea58", - "IPY_MODEL_36b99521f8274a639abb90eb0040d6c0", - "IPY_MODEL_3fd94ef662db4fff9dde61455b41faf1" - ], - "layout": "IPY_MODEL_d6283b2cf69d45f694633ae1544d47a8" - } - }, - "486c1a817552432c8fb20e59d0a3f079": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "501d213a24064f998d4d3c45255d02b7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "503d373bd18b4b79a1f694916734d903": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6e9e5e1ac58945d0926a85c1fd29ab17", - "IPY_MODEL_cc9ccdfefca941e1813258a19afe64ed", - "IPY_MODEL_c2238acd18b844c0bb517d670b76ca5c" - ], - "layout": "IPY_MODEL_90eec4e8ae8b42268548588db2fcbf49" - } - }, - "5057f8b8144d41ff9d8b82b8602570fc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5692de58835a466695fcc8f0d5976b74": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "571fd48c2da8432e8a74e7b318eb6042": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5a06b8d12b494daeb0624f2e39e06e67": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5cc0f7cc30ae4aa4b13966a773e4c824": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5e0377b4b48c441a8d747ea904c3207b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6143886f7acc4591ae5f79ce6f67af4a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "621bb7d632814cb0839755ca56098d7a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "668593b82ae54d3cbaf1a19c0307c545": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6e9e5e1ac58945d0926a85c1fd29ab17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_501d213a24064f998d4d3c45255d02b7", - "placeholder": "​", - "style": "IPY_MODEL_3d282336f5c3425386a417866f367007", - "value": "Generating Training Pairs: 100%" - } - }, - "70a57ad580f847d3bd3123cfe1539305": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "750011ef09534e55bab5180974bcf5d4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "77bd2b1f5e57441ab729c6e517279834": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "783115bacdbf4c0bb09c0b1fc7976d28": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_242f97eb0f0d4ab1830c62686127b717", - "IPY_MODEL_bfecbc09a4f84f3db51903d5048ff825", - "IPY_MODEL_db7cf4427ad746cd86df88f7a1016bc9" - ], - "layout": "IPY_MODEL_668593b82ae54d3cbaf1a19c0307c545" - } - }, - "7a12fbf5400a468fbdce4b2b2008eefc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7b96b0a21eba4ad5a4c12534940b3591": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7ca015b6798947d58d275de6181fe053": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "90eec4e8ae8b42268548588db2fcbf49": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "980f36d72cfa403aad67e871aecba890": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f5e35991e6d849eca73282c9c359000a", - "placeholder": "​", - "style": "IPY_MODEL_5a06b8d12b494daeb0624f2e39e06e67", - "value": " 1540/1540 [01:28<00:00, 21.45it/s]" - } - }, - "9a7c8861a37b41eba191059546f5dd5d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a9ce0af78a2241e697a22229db7840ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7a12fbf5400a468fbdce4b2b2008eefc", - "placeholder": "​", - "style": "IPY_MODEL_04150cf7e9a74a04aafa94d394553630", - "value": "Iteration: 100%" - } - }, - "ae6ffc6572b54c059196983da4ff2d79": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9a7c8861a37b41eba191059546f5dd5d", - "max": 1540, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_217760080e494d2d9b0582910d121a28", - "value": 1540 - } - }, - "ae7fc579502e46f7861e402580586b28": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bc0c58d9d798437fb1d40277d8777777": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "bfecbc09a4f84f3db51903d5048ff825": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5cc0f7cc30ae4aa4b13966a773e4c824", - "max": 60, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_28c40914eac34bcba0c9eb4dac6b0032", - "value": 60 - } - }, - "c21e90a6dda643d8bd82abf4e346d45c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_170a2ee20ab64a9b86db65549a5d4063", - "IPY_MODEL_fd7c2acc4b1945feabe6715dd270cb72", - "IPY_MODEL_2f271b0778974646aaff691227336e91" - ], - "layout": "IPY_MODEL_ef245777ac3d435e8715fc55b1d4824c" - } - }, - "c2238acd18b844c0bb517d670b76ca5c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1d58b40ad6a54c25bd451eda4e7d8069", - "placeholder": "​", - "style": "IPY_MODEL_5e0377b4b48c441a8d747ea904c3207b", - "value": " 20/20 [00:01<00:00, 10.96it/s]" - } - }, - "c2eac9934f5b407c8e424ee2da9eea58": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7ca015b6798947d58d275de6181fe053", - "placeholder": "​", - "style": "IPY_MODEL_750011ef09534e55bab5180974bcf5d4", - "value": "Epoch: 100%" - } - }, - "cc9ccdfefca941e1813258a19afe64ed": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7b96b0a21eba4ad5a4c12534940b3591", - "max": 20, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_571fd48c2da8432e8a74e7b318eb6042", - "value": 20 - } - }, - "d11aa6a0c8c54481b6cc2c80d1fa0ba1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_a9ce0af78a2241e697a22229db7840ab", - "IPY_MODEL_ae6ffc6572b54c059196983da4ff2d79", - "IPY_MODEL_980f36d72cfa403aad67e871aecba890" - ], - "layout": "IPY_MODEL_5692de58835a466695fcc8f0d5976b74" - } - }, - "d6283b2cf69d45f694633ae1544d47a8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "db7cf4427ad746cd86df88f7a1016bc9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3e622eeea5df47d6a21e015f3e742fa8", - "placeholder": "​", - "style": "IPY_MODEL_621bb7d632814cb0839755ca56098d7a", - "value": " 60/60 [00:02<00:00, 23.09it/s]" - } - }, - "ef245777ac3d435e8715fc55b1d4824c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f5e35991e6d849eca73282c9c359000a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fa5df54e161e40dbbb21ed96c879444e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fd7c2acc4b1945feabe6715dd270cb72": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ff7f98b368c448ea81e4c79fded0be5a", - "max": 20, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1ff157a9c8974b07ae97cb115c8d0188", - "value": 20 - } - }, - "ff7f98b368c448ea81e4c79fded0be5a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - } + "c2eac9934f5b407c8e424ee2da9eea58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7ca015b6798947d58d275de6181fe053", + "placeholder": "​", + "style": "IPY_MODEL_750011ef09534e55bab5180974bcf5d4", + "value": "Epoch: 100%" + } + }, + "cc9ccdfefca941e1813258a19afe64ed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7b96b0a21eba4ad5a4c12534940b3591", + "max": 20, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_571fd48c2da8432e8a74e7b318eb6042", + "value": 20 + } + }, + "d11aa6a0c8c54481b6cc2c80d1fa0ba1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a9ce0af78a2241e697a22229db7840ab", + "IPY_MODEL_ae6ffc6572b54c059196983da4ff2d79", + "IPY_MODEL_980f36d72cfa403aad67e871aecba890" + ], + "layout": "IPY_MODEL_5692de58835a466695fcc8f0d5976b74" + } + }, + "d6283b2cf69d45f694633ae1544d47a8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "db7cf4427ad746cd86df88f7a1016bc9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3e622eeea5df47d6a21e015f3e742fa8", + "placeholder": "​", + "style": "IPY_MODEL_621bb7d632814cb0839755ca56098d7a", + "value": " 60/60 [00:02<00:00, 23.09it/s]" + } + }, + "ef245777ac3d435e8715fc55b1d4824c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f5e35991e6d849eca73282c9c359000a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fa5df54e161e40dbbb21ed96c879444e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd7c2acc4b1945feabe6715dd270cb72": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ff7f98b368c448ea81e4c79fded0be5a", + "max": 20, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1ff157a9c8974b07ae97cb115c8d0188", + "value": 20 + } + }, + "ff7f98b368c448ea81e4c79fded0be5a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-spacy-llm.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-spacy-llm.ipynb index 85f99da90e..2db47be53d 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-spacy-llm.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/labelling-spacy-llm.ipynb @@ -108,10 +108,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\",\n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { @@ -134,7 +131,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -157,9 +154,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -247,8 +247,8 @@ "config = configparser.ConfigParser()\n", "config.read_string(config_string)\n", "\n", - "with open(\"config.cfg\", 'w') as configfile:\n", - " config.write(configfile)" + "with open(\"config.cfg\", \"w\") as configfile:\n", + " config.write(configfile)" ] }, { @@ -299,20 +299,21 @@ "metadata": {}, "outputs": [], "source": [ - "#returns the category with the highest score\n", + "# returns the category with the highest score\n", "def get_textcat_suggestion(doc):\n", - " model_prediction = doc.cats\n", - " return max(model_prediction, key=model_prediction.get)\n", + " model_prediction = doc.cats\n", + " return max(model_prediction, key=model_prediction.get)\n", + "\n", "\n", - "#selects the top N sentences with the highest scores and return combined string\n", + "# selects the top N sentences with the highest scores and return combined string\n", "def get_summarization_suggestion(doc):\n", - " sentence_scores = Counter()\n", - " for sentence in doc.sents:\n", - " for word in sentence:\n", - " sentence_scores[sentence] += 1\n", - " summary_sentences = nlargest(2, sentence_scores, key=sentence_scores.get)\n", - " summary = ' '.join(str(sentence) for sentence in summary_sentences)\n", - " return summary" + " sentence_scores = Counter()\n", + " for sentence in doc.sents:\n", + " for word in sentence:\n", + " sentence_scores[sentence] += 1\n", + " summary_sentences = nlargest(2, sentence_scores, key=sentence_scores.get)\n", + " summary = \" \".join(str(sentence) for sentence in summary_sentences)\n", + " return summary" ] }, { @@ -354,21 +355,16 @@ "outputs": [], "source": [ "dataset = rg.FeedbackDataset(\n", - " fields=[\n", - " rg.TextField(name=\"text\")\n", - " ],\n", + " fields=[rg.TextField(name=\"text\")],\n", " questions=[\n", " rg.LabelQuestion(\n", " name=\"label-question\",\n", " title=\"Classify the text category.\",\n", - " #make sure that the labels are in line with the labels we have defined in config.cfg\n", - " labels=[\"HISTORY\",\"MUSIC\",\"TECHNOLOGY\",\"SCIENCE\",\"SPORTS\",\"POLITICS\"]\n", + " # make sure that the labels are in line with the labels we have defined in config.cfg\n", + " labels=[\"HISTORY\", \"MUSIC\", \"TECHNOLOGY\", \"SCIENCE\", \"SPORTS\", \"POLITICS\"],\n", " ),\n", - " rg.TextQuestion(\n", - " name=\"text-question\",\n", - " title=\"Provide a summary for the text.\"\n", - " )\n", - " ]\n", + " rg.TextQuestion(name=\"text-question\", title=\"Provide a summary for the text.\"),\n", + " ],\n", ")" ] }, @@ -387,16 +383,16 @@ "source": [ "records = [\n", " rg.FeedbackRecord(\n", - " fields={\n", - " \"text\": doc.text\n", - " },\n", + " fields={\"text\": doc.text},\n", " suggestions=[\n", - " {\"question_name\": \"label-question\",\n", - " \"value\": get_textcat_suggestion(doc)},\n", - " {\"question_name\":\"text-question\",\n", - " \"value\": get_summarization_suggestion(doc)}\n", - " ]\n", - " ) for doc in [nlp(item) for item in dataset_hf[\"context\"]]\n", + " {\"question_name\": \"label-question\", \"value\": get_textcat_suggestion(doc)},\n", + " {\n", + " \"question_name\": \"text-question\",\n", + " \"value\": get_summarization_suggestion(doc),\n", + " },\n", + " ],\n", + " )\n", + " for doc in [nlp(item) for item in dataset_hf[\"context\"]]\n", "]" ] }, diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/making-most-of-markdown.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/making-most-of-markdown.ipynb index 253f77eeee..3c83033fdd 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/making-most-of-markdown.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/making-most-of-markdown.ipynb @@ -89,7 +89,12 @@ "outputs": [], "source": [ "import argilla as rg\n", - "from argilla.client.feedback.utils import audio_to_html, image_to_html, video_to_html, pdf_to_html\n", + "from argilla.client.feedback.utils import (\n", + " audio_to_html,\n", + " image_to_html,\n", + " video_to_html,\n", + " pdf_to_html,\n", + ")\n", "\n", "import re\n", "import os\n", @@ -124,11 +129,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -151,7 +152,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -187,11 +188,11 @@ "outputs": [], "source": [ "# Load the custom pipeline\n", - "nlp = spacy.load(\n", - " \"en_core_web_sm\", \n", - " exclude=[\"ner\"]\n", - ")\n", - "nlp.add_pipe(\"span_marker\", config={\"model\": \"tomaarsen/span-marker-bert-tiny-fewnerd-coarse-super\"})" + "nlp = spacy.load(\"en_core_web_sm\", exclude=[\"ner\"])\n", + "nlp.add_pipe(\n", + " \"span_marker\",\n", + " config={\"model\": \"tomaarsen/span-marker-bert-tiny-fewnerd-coarse-super\"},\n", + ")" ] }, { @@ -427,16 +428,34 @@ "# Create the FeedbackDataset configuration\n", "dataset_spacy = rg.FeedbackDataset(\n", " fields=[\n", - " rg.TextField(name=\"text\", required= True, use_markdown=True),\n", - " rg.TextField(name=\"dependency-tree\", required= True, use_markdown=True),\n", - " rg.TextField(name=\"entities\", required= True, use_markdown=True)\n", + " rg.TextField(name=\"text\", required=True, use_markdown=True),\n", + " rg.TextField(name=\"dependency-tree\", required=True, use_markdown=True),\n", + " rg.TextField(name=\"entities\", required=True, use_markdown=True),\n", " ],\n", " questions=[\n", - " rg.LabelQuestion(name=\"relevant\", title=\"Is the text relevant?\", labels=[\"Yes\", \"No\"], required=True),\n", - " rg.MultiLabelQuestion(name=\"question-multi\", title=\"Mark which is correct\", labels=[\"flag-pos\", \"flag-ner\"], required=True),\n", - " rg.TextQuestion(name=\"dependency-correction\", title=\"Write the correct answer if needed\", use_markdown=True),\n", - " rg.TextQuestion(name=\"ner-correction\", title=\"Write the correct answer if needed\", use_markdown=True)\n", - " ]\n", + " rg.LabelQuestion(\n", + " name=\"relevant\",\n", + " title=\"Is the text relevant?\",\n", + " labels=[\"Yes\", \"No\"],\n", + " required=True,\n", + " ),\n", + " rg.MultiLabelQuestion(\n", + " name=\"question-multi\",\n", + " title=\"Mark which is correct\",\n", + " labels=[\"flag-pos\", \"flag-ner\"],\n", + " required=True,\n", + " ),\n", + " rg.TextQuestion(\n", + " name=\"dependency-correction\",\n", + " title=\"Write the correct answer if needed\",\n", + " use_markdown=True,\n", + " ),\n", + " rg.TextQuestion(\n", + " name=\"ner-correction\",\n", + " title=\"Write the correct answer if needed\",\n", + " use_markdown=True,\n", + " ),\n", + " ],\n", ")\n", "dataset_spacy" ] @@ -472,11 +491,11 @@ "outputs": [], "source": [ "# Load the custom pipeline\n", - "nlp = spacy.load(\n", - " \"en_core_web_sm\", \n", - " exclude=[\"ner\"]\n", + "nlp = spacy.load(\"en_core_web_sm\", exclude=[\"ner\"])\n", + "nlp.add_pipe(\n", + " \"span_marker\",\n", + " config={\"model\": \"tomaarsen/span-marker-bert-tiny-fewnerd-coarse-super\"},\n", ")\n", - "nlp.add_pipe(\"span_marker\", config={\"model\": \"tomaarsen/span-marker-bert-tiny-fewnerd-coarse-super\"})\n", "\n", "# Read the dataset and run the pipeline\n", "texts = [\" \".join(x[\"tokens\"]) for x in dataset_fewnerd]\n", @@ -492,11 +511,11 @@ "# Define the function to set the correct width and height of the SVG element\n", "def wrap_in_max_width(html):\n", " html = html.replace(\"max-width: none;\", \"\")\n", - " \n", + "\n", " # Remove existing width and height setting based on regex width=\"/d\"\n", " html = re.sub(r\"width=\\\"\\d+\\\"\", \"overflow-x: auto;\", html)\n", " html = re.sub(r\"height=\\\"\\d+\\\"\", \"\", html)\n", - " \n", + "\n", " # Find the SVG element in the HTML output\n", " svg_start = html.find(\"\") + len(\"\")\n", @@ -520,23 +539,27 @@ "for doc in docs:\n", " record = rg.FeedbackRecord(\n", " fields={\n", - " \"text\": doc.text, \n", - " \"dependency-tree\": displacy.render(doc, style=\"dep\", jupyter=False), \n", - " \"entities\": displacy.render(doc, style=\"ent\", jupyter=False)\n", + " \"text\": doc.text,\n", + " \"dependency-tree\": displacy.render(doc, style=\"dep\", jupyter=False),\n", + " \"entities\": displacy.render(doc, style=\"ent\", jupyter=False),\n", " },\n", - " suggestions=[{\n", - " \"question_name\": \"dependency-correction\", \n", - " \"value\": pd.DataFrame([{\"Label\": token.dep_, \"Text\": token.text} for token in doc]).to_markdown(index=False)\n", - "\n", + " suggestions=[\n", + " {\n", + " \"question_name\": \"dependency-correction\",\n", + " \"value\": pd.DataFrame(\n", + " [{\"Label\": token.dep_, \"Text\": token.text} for token in doc]\n", + " ).to_markdown(index=False),\n", " },\n", " {\n", - " \"question_name\": \"ner-correction\", \n", - " \"value\": pd.DataFrame([{\"Label\": ent.label_, \"Text\": ent.text} for ent in doc.ents]).to_markdown(index=False),\n", - " }\n", - " ]\n", + " \"question_name\": \"ner-correction\",\n", + " \"value\": pd.DataFrame(\n", + " [{\"Label\": ent.label_, \"Text\": ent.text} for ent in doc.ents]\n", + " ).to_markdown(index=False),\n", + " },\n", + " ],\n", " )\n", " records.append(record)\n", - " \n", + "\n", "dataset_spacy.add_records(records)" ] }, @@ -547,7 +570,9 @@ "outputs": [], "source": [ "# Push the dataset to Argilla\n", - "dataset_spacy = dataset_spacy.push_to_argilla(name=\"exploiting_displacy\", workspace=\"admin\")" + "dataset_spacy = dataset_spacy.push_to_argilla(\n", + " name=\"exploiting_displacy\", workspace=\"admin\"\n", + ")" ] }, { @@ -602,7 +627,14 @@ "# Configure the FeedbackDataset\n", "ds_multi_modal = rg.FeedbackDataset(\n", " fields=[rg.TextField(name=\"content\", use_markdown=True, required=True)],\n", - " questions=[rg.TextQuestion(name=\"description\", title=\"Describe the content of the media:\", use_markdown=True, required=True)],\n", + " questions=[\n", + " rg.TextQuestion(\n", + " name=\"description\",\n", + " title=\"Describe the content of the media:\",\n", + " use_markdown=True,\n", + " required=True,\n", + " )\n", + " ],\n", ")\n", "ds_multi_modal" ] @@ -622,9 +654,17 @@ "source": [ "# Add the records\n", "records = [\n", - " rg.FeedbackRecord(fields={\"content\": video_to_html(\"/content/snapshot.mp4\", autoplay=True)}),\n", - " rg.FeedbackRecord(fields={\"content\": audio_to_html(\"/content/sea.wav\", autoplay=True, loop=True)}),\n", - " rg.FeedbackRecord(fields={\"content\": image_to_html(\"/content/peacock.jpg\", width=\"50%\", height=\"50%\")}),\n", + " rg.FeedbackRecord(\n", + " fields={\"content\": video_to_html(\"/content/snapshot.mp4\", autoplay=True)}\n", + " ),\n", + " rg.FeedbackRecord(\n", + " fields={\"content\": audio_to_html(\"/content/sea.wav\", autoplay=True, loop=True)}\n", + " ),\n", + " rg.FeedbackRecord(\n", + " fields={\n", + " \"content\": image_to_html(\"/content/peacock.jpg\", width=\"50%\", height=\"50%\")\n", + " }\n", + " ),\n", "]\n", "ds_multi_modal.add_records(records)" ] @@ -706,24 +746,24 @@ " questions=[\n", " rg.TextQuestion(\n", " name=\"description\",\n", - " title=\"Describe the content of the media.\", \n", - " use_markdown=True, \n", - " required=True\n", + " title=\"Describe the content of the media.\",\n", + " use_markdown=True,\n", + " required=True,\n", " ),\n", " rg.RatingQuestion(\n", " name=\"quality\",\n", " description=\"Rate the overall quality of the content on a scale from 1 to 5.\",\n", " required=True,\n", - " values=[1, 2, 3, 4, 5]\n", + " values=[1, 2, 3, 4, 5],\n", " ),\n", " rg.LabelQuestion(\n", " name=\"age_group\",\n", " description=\"Select the most appropriate age group for this content.\",\n", " required=True,\n", - " labels = [\"Children\", \"Teens\", \"Adults\", \"All Ages\"]\n", + " labels=[\"Children\", \"Teens\", \"Adults\", \"All Ages\"],\n", " ),\n", " ],\n", - " metadata_properties = [\n", + " metadata_properties=[\n", " rg.TermsMetadataProperty(\n", " name=\"groups\",\n", " title=\"Annotation groups\",\n", @@ -739,7 +779,7 @@ " title=\"Original dataset source\",\n", " ),\n", " ],\n", - " allow_extra_metadata = False\n", + " allow_extra_metadata=False,\n", ")\n", "\n", "try:\n", @@ -773,7 +813,9 @@ "# Download the dataset\n", "hf_dataset_identifier = \"sayakpaul/ucf101-subset\"\n", "filename = \"UCF101_subset.tar.gz\"\n", - "file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type=\"dataset\")" + "file_path = hf_hub_download(\n", + " repo_id=hf_dataset_identifier, filename=filename, repo_type=\"dataset\"\n", + ")" ] }, { @@ -784,7 +826,7 @@ "source": [ "# Unpack the files\n", "with tarfile.open(file_path) as t:\n", - " t.extractall(\".\")" + " t.extractall(\".\")" ] }, { @@ -804,22 +846,24 @@ "# Iterate over the directories (each corresponding to a class) in the 'train' folder.\n", "for folder in os.listdir(base_directory):\n", " folder_path = os.path.join(base_directory, folder)\n", - " \n", + "\n", " # Check if it's a directory.\n", " if os.path.isdir(folder_path):\n", " # Get all .avi files in the directory.\n", " avi_files = glob.glob(os.path.join(folder_path, \"*.avi\"))\n", - " \n", + "\n", " # Randomly select 2 .avi files.\n", " selected_files = random.sample(avi_files, 2)\n", - " \n", + "\n", " for avi_file in selected_files:\n", " # Define the output .mp4 file path.\n", - " mp4_file = os.path.join(output_directory, os.path.basename(avi_file).replace(\".avi\", \".mp4\"))\n", - " \n", + " mp4_file = os.path.join(\n", + " output_directory, os.path.basename(avi_file).replace(\".avi\", \".mp4\")\n", + " )\n", + "\n", " # Command to convert .avi to .mp4 using ffmpeg.\n", - " command = f\"ffmpeg -i \\\"{avi_file}\\\" -c:v libx264 -c:a aac \\\"{mp4_file}\\\"\"\n", - " \n", + " command = f'ffmpeg -i \"{avi_file}\" -c:v libx264 -c:a aac \"{mp4_file}\"'\n", + "\n", " try:\n", " subprocess.run(command, check=True, shell=True)\n", " print(f\"Converted {avi_file} to {mp4_file}\")\n", @@ -885,7 +929,7 @@ "metadata": {}, "outputs": [], "source": [ - "my_image_dataset = load_dataset(\"zishuod/pokemon-icons\", split=\"train[:20]\")\n", + "my_image_dataset = load_dataset(\"zishuod/pokemon-icons\", split=\"train[:20]\")\n", "my_image_dataset = my_image_dataset.shuffle()" ] }, @@ -940,14 +984,17 @@ "\n", "# Iterate over all the entries in random order\n", "for entry in entries:\n", - " \n", " # Define the full path\n", " fullPath = os.path.join(random_dir, entry)\n", "\n", " # Add the records to the FeedbackDataset\n", " record = rg.FeedbackRecord(\n", " fields={\"content\": video_to_html(fullPath)},\n", - " metadata={\"groups\":\"group-a\", \"media\":\"video\", \"source-dataset\":\"https://huggingface.co/datasets/sayakpaul/ucf101-subset\"}\n", + " metadata={\n", + " \"groups\": \"group-a\",\n", + " \"media\": \"video\",\n", + " \"source-dataset\": \"https://huggingface.co/datasets/sayakpaul/ucf101-subset\",\n", + " },\n", " )\n", " try:\n", " multi_modal_dataset.add_records(record, show_progress=True)\n", @@ -983,11 +1030,14 @@ "source": [ "# Iterate over the samples in the dataset\n", "for entry in my_audio_dataset:\n", - " \n", " # Add the records to the FeedbackDataset\n", " record = rg.FeedbackRecord(\n", " fields={\"content\": audio_to_html(entry[\"audio\"][\"path\"])},\n", - " metadata={\"groups\":\"group-b\", \"media\":\"audio\", \"source-dataset\":\"https://huggingface.co/datasets/ccmusic-database/bel_folk\"}\n", + " metadata={\n", + " \"groups\": \"group-b\",\n", + " \"media\": \"audio\",\n", + " \"source-dataset\": \"https://huggingface.co/datasets/ccmusic-database/bel_folk\",\n", + " },\n", " )\n", " try:\n", " multi_modal_dataset.add_records(record, show_progress=True)\n", @@ -1027,14 +1077,17 @@ "# Iterate over the samples in the dataset\n", "records = []\n", "for entry in my_image_dataset:\n", - " \n", " # Save the image to the temporary path\n", " entry[\"image\"].save(temp_img_path, format=\"png\")\n", - " \n", + "\n", " # Add the records to the FeedbackDataset\n", " record = rg.FeedbackRecord(\n", " fields={\"content\": image_to_html(temp_img_path, file_type=\"png\")},\n", - " metadata={\"groups\":\"group-c\", \"media\":\"image\", \"source-dataset\":\"https://huggingface.co/datasets/zishuod/pokemon-icons\"}\n", + " metadata={\n", + " \"groups\": \"group-c\",\n", + " \"media\": \"image\",\n", + " \"source-dataset\": \"https://huggingface.co/datasets/zishuod/pokemon-icons\",\n", + " },\n", " )\n", " try:\n", " multi_modal_dataset.add_records(record, show_progress=True)\n", @@ -1100,17 +1153,20 @@ " fields=[\n", " rg.TextField(name=\"content\", use_markdown=True, required=True),\n", " ],\n", - " questions=[\n", - " rg.TextQuestion(name=\"description\", use_markdown=True, required=True)\n", - " ],\n", + " questions=[rg.TextQuestion(name=\"description\", use_markdown=True, required=True)],\n", ")\n", "\n", "# Push the dataset to Argilla\n", - "ds_pdf = ds_pdf.push_to_argilla(name='analyze_pdf_dataset', workspace='argilla')\n", + "ds_pdf = ds_pdf.push_to_argilla(name=\"analyze_pdf_dataset\", workspace=\"argilla\")\n", "\n", "# Add the records using pdf_to_html\n", "records = [\n", - " rg.FeedbackRecord(fields={\"content\": pdf_to_html(file_source=file_url, width=\"700px\", height=\"700px\")})]\n", + " rg.FeedbackRecord(\n", + " fields={\n", + " \"content\": pdf_to_html(file_source=file_url, width=\"700px\", height=\"700px\")\n", + " }\n", + " )\n", + "]\n", "ds_pdf.add_records(records)" ] }, diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-bias-ethics-dpo.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-bias-ethics-dpo.ipynb index 32db760440..c036712943 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-bias-ethics-dpo.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-bias-ethics-dpo.ipynb @@ -174,11 +174,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"argilla.apikey\",\n", - " workspace=\"argilla\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"argilla.apikey\", workspace=\"argilla\")" ] }, { @@ -201,7 +197,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -214,7 +210,7 @@ "outputs": [], "source": [ "# Your openAI key is needed for testing the model\n", - "os.environ['OPENAI_API_KEY'] = 'sk-...'\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n", "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" ] }, @@ -235,9 +231,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -268,7 +267,9 @@ "outputs": [], "source": [ "# Indicate the URL to the report\n", - "REPORT_URL = \"https://commission.europa.eu/system/files/2023-01/report-migration-asylum-2022.pdf\"" + "REPORT_URL = (\n", + " \"https://commission.europa.eu/system/files/2023-01/report-migration-asylum-2022.pdf\"\n", + ")" ] }, { @@ -280,16 +281,21 @@ "# Indicate the name of the query column\n", "TEXT_COLUMN_NAME = \"query\"\n", "\n", - "giskard_dataset = Dataset(pd.DataFrame({\n", - " TEXT_COLUMN_NAME: [\n", - " \"According to the migration and asylum report, what are the key challenges in Europe?\",\n", - " \"How can migration influence in Europe?\",\n", - " \"What strategies does the migration and asylum report recommend for managing migration in Europe?\",\n", - " \"What are the main reasons for migration?\",\n", - " \"How does the report assess the effectiveness of current asylum procedures in Europe?\",\n", - " \"How should the cross-border cooperation on migration be improved?\",\n", - " ]\n", - "}), target=None)" + "giskard_dataset = Dataset(\n", + " pd.DataFrame(\n", + " {\n", + " TEXT_COLUMN_NAME: [\n", + " \"According to the migration and asylum report, what are the key challenges in Europe?\",\n", + " \"How can migration influence in Europe?\",\n", + " \"What strategies does the migration and asylum report recommend for managing migration in Europe?\",\n", + " \"What are the main reasons for migration?\",\n", + " \"How does the report assess the effectiveness of current asylum procedures in Europe?\",\n", + " \"How should the cross-border cooperation on migration be improved?\",\n", + " ]\n", + " }\n", + " ),\n", + " target=None,\n", + ")" ] }, { @@ -337,18 +343,27 @@ "source": [ "# Pre-process the report to work as context\n", "context_storage_cache = None\n", + "\n", + "\n", "def get_context_storage() -> FAISS:\n", " global context_storage_cache\n", " if context_storage_cache is None:\n", - " text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, add_start_index=True)\n", + " text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size=1000, chunk_overlap=100, add_start_index=True\n", + " )\n", " docs = PyPDFLoader(REPORT_URL).load_and_split(text_splitter)\n", " context_storage_cache = FAISS.from_documents(docs, OpenAIEmbeddings())\n", " return context_storage_cache\n", "\n", + "\n", "# Create the chain\n", "llm = OpenAI(model=LLM_NAME, temperature=0)\n", - "prompt = PromptTemplate(template=PROMPT_TEMPLATE, input_variables=[\"question\", \"context\"])\n", - "qa_system = RetrievalQA.from_llm(llm=llm, retriever=get_context_storage().as_retriever(), prompt=prompt)" + "prompt = PromptTemplate(\n", + " template=PROMPT_TEMPLATE, input_variables=[\"question\", \"context\"]\n", + ")\n", + "qa_system = RetrievalQA.from_llm(\n", + " llm=llm, retriever=get_context_storage().as_retriever(), prompt=prompt\n", + ")" ] }, { @@ -369,7 +384,9 @@ } ], "source": [ - "qa_system(\"According to the migration and asylum report, what are the key challenges in Europe?\")" + "qa_system(\n", + " \"According to the migration and asylum report, what are the key challenges in Europe?\"\n", + ")" ] }, { @@ -405,13 +422,14 @@ " chain = load_chain(src.joinpath(\"model.json\"), retriever=db.as_retriever())\n", " return chain\n", "\n", + "\n", "# Wrap up the QA chain\n", "giskard_model = FAISSRAGModel(\n", " model=qa_system,\n", " model_type=\"text_generation\",\n", " name=\"Migration and Asylum Question Answering\",\n", " description=\"This model answers questions about migration and asylum in Europe based on the Migration and Asylum Report from the European Commission.\",\n", - " feature_names=[TEXT_COLUMN_NAME]\n", + " feature_names=[TEXT_COLUMN_NAME],\n", ")" ] }, @@ -2349,10 +2367,10 @@ "outputs": [], "source": [ "# Save the results in html\n", - "results.to_html('results.html')\n", + "results.to_html(\"results.html\")\n", "\n", "# Save the results in avidoc\n", - "results.to_avid('results.avidoc')" + "results.to_avid(\"results.avidoc\")" ] }, { @@ -2376,34 +2394,39 @@ "outputs": [], "source": [ "# Indicate the path of the avidoc file\n", - "filename = 'results.avidoc'\n", + "filename = \"results.avidoc\"\n", "\n", "# Read and process the avidoc file\n", "data_list = []\n", - "with open(filename, 'r') as file:\n", + "with open(filename, \"r\") as file:\n", " lines = file.readlines()\n", "\n", " # Note that each test type is saved in a different line\n", " for line in lines:\n", " data = json.loads(line)\n", "\n", - " for metric in data.get('metrics', []):\n", - " for example in metric.get('results', {}).get('examples', []):\n", - " \n", + " for metric in data.get(\"metrics\", []):\n", + " for example in metric.get(\"results\", {}).get(\"examples\", []):\n", " # Check for input_vars query\n", - " text = example.get('input_vars', {}).get('query', '')\n", + " text = example.get(\"input_vars\", {}).get(\"query\", \"\")\n", " if text:\n", - " model_output = example.get('model_output', '')\n", - " data_list.append({'input_question': text, 'model_output': model_output})\n", - " \n", + " model_output = example.get(\"model_output\", \"\")\n", + " data_list.append(\n", + " {\"input_question\": text, \"model_output\": model_output}\n", + " )\n", + "\n", " # Check for input_1 and input_2 queries (Hallucination and Misinformation tab)\n", - " input_1_text = example.get('input_1', {}).get('query', '')\n", - " input_2_text = example.get('input_2', {}).get('query', '')\n", + " input_1_text = example.get(\"input_1\", {}).get(\"query\", \"\")\n", + " input_2_text = example.get(\"input_2\", {}).get(\"query\", \"\")\n", " if input_1_text and input_2_text:\n", - " output_1 = example.get('output_1', '')\n", - " output_2 = example.get('output_2', '')\n", - " data_list.append({'input_question': input_1_text, 'model_output': output_1})\n", - " data_list.append({'input_question': input_2_text, 'model_output': output_2})\n", + " output_1 = example.get(\"output_1\", \"\")\n", + " output_2 = example.get(\"output_2\", \"\")\n", + " data_list.append(\n", + " {\"input_question\": input_1_text, \"model_output\": output_1}\n", + " )\n", + " data_list.append(\n", + " {\"input_question\": input_2_text, \"model_output\": output_2}\n", + " )\n", "\n", "# Create a dataframe with input questions and model outputs\n", "df_data = pd.DataFrame(data_list)" @@ -2511,8 +2534,14 @@ "dataset = rg.FeedbackDataset(\n", " fields=[rg.TextField(name=\"instruction\"), rg.TextField(name=\"response\")],\n", " questions=[\n", - " rg.TextQuestion(name=\"new-instruction\", title=\"Write a helpful, harmless, accurate instruction for the user response\"),\n", - " rg.TextQuestion(name=\"new-response\", title=\"Write a helpful, harmless, accurate response to the user question\"),\n", + " rg.TextQuestion(\n", + " name=\"new-instruction\",\n", + " title=\"Write a helpful, harmless, accurate instruction for the user response\",\n", + " ),\n", + " rg.TextQuestion(\n", + " name=\"new-response\",\n", + " title=\"Write a helpful, harmless, accurate response to the user question\",\n", + " ),\n", " ],\n", ")\n", "dataset = dataset.push_to_argilla(name=\"bias_dataset\", workspace=\"argilla\")" @@ -2527,17 +2556,17 @@ "# Create the records and add them to the dataset\n", "records = [\n", " rg.FeedbackRecord(\n", - " fields={\"instruction\": row['input_question'], \"response\": row['model_output']},\n", - " suggestions = [\n", - " {\n", - " \"question_name\": \"new-instruction\",\n", - " \"value\": row['input_question'],\n", - " },\n", - " {\n", - " \"question_name\": \"new-response\",\n", - " \"value\": row['model_output'],\n", - " }\n", - " ],\n", + " fields={\"instruction\": row[\"input_question\"], \"response\": row[\"model_output\"]},\n", + " suggestions=[\n", + " {\n", + " \"question_name\": \"new-instruction\",\n", + " \"value\": row[\"input_question\"],\n", + " },\n", + " {\n", + " \"question_name\": \"new-response\",\n", + " \"value\": row[\"model_output\"],\n", + " },\n", + " ],\n", " )\n", " for _, row in df_data.iterrows()\n", "]\n", @@ -2616,7 +2645,9 @@ "outputs": [], "source": [ "# Load the annotated dataset\n", - "annotated_dataset = rg.FeedbackDataset.from_argilla(name=\"bias_dataset\", workspace=\"argilla\")" + "annotated_dataset = rg.FeedbackDataset.from_argilla(\n", + " name=\"bias_dataset\", workspace=\"argilla\"\n", + ")" ] }, { @@ -2630,6 +2661,7 @@ "Instruct: {instruction}\\n\n", "Output: {response}\"\"\"\n", "\n", + "\n", "def formatting_func(sample: Dict[str, Any]) -> Iterator[Tuple[str, str]]:\n", " # Our annotators were asked to provide new responses, which we assume are better than the originals\n", " og_instruction = sample[\"instruction\"]\n", @@ -2727,13 +2759,17 @@ "source": [ "compute_dtype = getattr(torch, \"float16\")\n", "bnb_config = BitsAndBytesConfig(\n", - " load_in_4bit=True,\n", - " bnb_4bit_quant_type='nf4',\n", - " bnb_4bit_compute_dtype='float16',\n", - " bnb_4bit_use_double_quant=False,\n", - " )\n", + " load_in_4bit=True,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_compute_dtype=\"float16\",\n", + " bnb_4bit_use_double_quant=False,\n", + ")\n", "model = AutoModelForCausalLM.from_pretrained(\n", - " model_name, torch_dtype=torch.float16, quantization_config=bnb_config, trust_remote_code=True, device_map={\"\": 0}\n", + " model_name,\n", + " torch_dtype=torch.float16,\n", + " quantization_config=bnb_config,\n", + " trust_remote_code=True,\n", + " device_map={\"\": 0},\n", ")\n", "model.config.pad_token_id = tokenizer.pad_token_id\n", "model.config.use_cache = False\n", @@ -2754,13 +2790,17 @@ "outputs": [], "source": [ "bnb_config = BitsAndBytesConfig(\n", - " load_in_4bit=True,\n", - " bnb_4bit_quant_type='nf4',\n", - " bnb_4bit_compute_dtype='float16',\n", - " bnb_4bit_use_double_quant=False,\n", - " )\n", + " load_in_4bit=True,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_compute_dtype=\"float16\",\n", + " bnb_4bit_use_double_quant=False,\n", + ")\n", "model_ref = AutoModelForCausalLM.from_pretrained(\n", - " model_name, quantization_config=bnb_config, torch_dtype=torch.float16, trust_remote_code=True, device_map={\"\": 0}\n", + " model_name,\n", + " quantization_config=bnb_config,\n", + " torch_dtype=torch.float16,\n", + " trust_remote_code=True,\n", + " device_map={\"\": 0},\n", ")" ] }, @@ -2781,7 +2821,7 @@ " lora_alpha=16,\n", " lora_dropout=0.5,\n", " r=32,\n", - " target_modules=['k_proj', 'q_proj', 'v_proj', 'fc1', 'fc2'],\n", + " target_modules=[\"k_proj\", \"q_proj\", \"v_proj\", \"fc1\", \"fc2\"],\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", ")" @@ -2829,7 +2869,7 @@ " logging_steps=10,\n", " learning_rate=1e-5,\n", " eval_steps=20,\n", - " num_train_epochs=1, # Modified for the tutorial purpose\n", + " num_train_epochs=1, # Modified for the tutorial purpose\n", " max_steps=100,\n", " warmup_steps=20,\n", " lr_scheduler_type=\"linear\",\n", diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb index 0973bffe53..4092e3327d 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb @@ -154,8 +154,8 @@ "\n", "import ollama\n", "\n", - "nltk.download('punkt')\n", - "nltk.download('stopwords')" + "nltk.download(\"punkt\")\n", + "nltk.download(\"stopwords\")" ] }, { @@ -174,11 +174,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"argilla.apikey\",\n", - " workspace=\"argilla\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"argilla.apikey\", workspace=\"argilla\")" ] }, { @@ -201,7 +197,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -224,9 +220,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -250,7 +249,9 @@ "outputs": [], "source": [ "# Load the dataset\n", - "hf_dataset_games = load_dataset(\"LoganKells/amazon_product_reviews_video_games\", split= \"train[:10000]\")" + "hf_dataset_games = load_dataset(\n", + " \"LoganKells/amazon_product_reviews_video_games\", split=\"train[:10000]\"\n", + ")" ] }, { @@ -368,7 +369,7 @@ "df_games = hf_dataset_games.to_pandas()\n", "\n", "# Convert the UnixReviewTime to a datetime\n", - "df_games['datetime'] = pd.to_datetime(df_games['unixReviewTime'], unit='s')\n", + "df_games[\"datetime\"] = pd.to_datetime(df_games[\"unixReviewTime\"], unit=\"s\")\n", "\n", "# Select the columns we want to keep\n", "df_games = df_games[[\"reviewText\", \"overall\", \"datetime\"]]\n", @@ -401,8 +402,8 @@ "# Divide the dataset into two parts\n", "ref_year = 2005\n", "curr_year = 2010\n", - "df_reference = df_games[df_games['datetime'].dt.year < ref_year]\n", - "df_current = df_games[df_games['datetime'].dt.year >= curr_year]\n", + "df_reference = df_games[df_games[\"datetime\"].dt.year < ref_year]\n", + "df_current = df_games[df_games[\"datetime\"].dt.year >= curr_year]\n", "\n", "print(f\"Reference dataset: {df_reference.shape[0]} reviews\")\n", "print(f\"Current dataset: {df_current.shape[0]} reviews\")" @@ -430,9 +431,10 @@ " guidelines=None,\n", " metadata_properties=[\n", " rg.TermsMetadataProperty(\n", - " name=\"datetime\",\n", - " title=\"Datetime\",\n", - " )],\n", + " name=\"datetime\",\n", + " title=\"Datetime\",\n", + " )\n", + " ],\n", " vectors_settings=None,\n", " )\n", " return dataset" @@ -445,7 +447,13 @@ "outputs": [], "source": [ "# To convert the rating to a label\n", - "id2label = {0.0: \"very-negative\", 1.0: \"negative\", 2.0: \"neutral\", 3.0: \"positive\", 4.0: \"very-positive\"}" + "id2label = {\n", + " 0.0: \"very-negative\",\n", + " 1.0: \"negative\",\n", + " 2.0: \"neutral\",\n", + " 3.0: \"positive\",\n", + " 4.0: \"very-positive\",\n", + "}" ] }, { @@ -490,20 +498,10 @@ "records = [\n", " rg.FeedbackRecord(\n", " fields={\"text\": row[\"review\"]},\n", - " responses=[\n", - " {\n", - " \"values\": {\n", - " \"label\": {\n", - " \"value\": id2label[row[\"rating\"]]\n", - " }\n", - " }\n", - " }\n", - " ],\n", - " metadata={\n", - " \"datetime\": str(row[\"datetime\"])\n", - " }\n", + " responses=[{\"values\": {\"label\": {\"value\": id2label[row[\"rating\"]]}}}],\n", + " metadata={\"datetime\": str(row[\"datetime\"])},\n", " )\n", - " for _ , row in df_reference.iterrows()\n", + " for _, row in df_reference.iterrows()\n", "]\n", "\n", "ref_rg_ds.add_records(records)\n", @@ -529,7 +527,7 @@ "task = TrainingTask.for_text_classification(\n", " text=ref_rg_ds.field_by_name(\"text\"),\n", " label=ref_rg_ds.question_by_name(\"label\"),\n", - " label_strategy=None\n", + " label_strategy=None,\n", ")\n", "\n", "trainer = ArgillaTrainer(\n", @@ -579,12 +577,12 @@ "metadata": {}, "outputs": [], "source": [ - "modelfile='''\n", + "modelfile = \"\"\"\n", "FROM gemma:7b-instruct\n", "SYSTEM You are a professional video game reviewer. Your job is to write a new and ambiguous video game review inspired by the given one. Please provide the new review that is within 128 tokens, ensuring that no sentences are left incomplete. Directly provide the new review without any other information.\n", - "'''\n", + "\"\"\"\n", "\n", - "ollama.create(model='data-drift-simulator', modelfile=modelfile)" + "ollama.create(model=\"data-drift-simulator\", modelfile=modelfile)" ] }, { @@ -607,10 +605,10 @@ "source": [ "# Example of using the model\n", "response = ollama.generate(\n", - " model='data-drift-simulator',\n", - " prompt=\"Installing the game was a struggle (because of games for windows live bugs).Some championship races and cars can only be 'unlocked' by buying them as an addon to the game. I paid nearly 30 dollars when the game was new. I don\\'t like the idea that I have to keep paying to keep playing.I noticed no improvement in the physics or graphics compared to Dirt 2.I tossed it in the garbage and vowed never to buy another codemasters game. I\\'m really tired of arcade style rally/racing games anyway.I\\'ll continue to get my fix from Richard Burns Rally, and you should to. :)http://www.amazon.com/Richard-Burns-Rally-PC/dp/B000C97156/ref=sr_1_1?ie=UTF8&qid;=1341886844&sr;=8-1&keywords;=richard+burns+rallyThank you for reading my review! If you enjoyed it, be sure to rate it as helpful.\",\n", - " options={'num_predict':128}\n", - " )\n", + " model=\"data-drift-simulator\",\n", + " prompt=\"Installing the game was a struggle (because of games for windows live bugs).Some championship races and cars can only be 'unlocked' by buying them as an addon to the game. I paid nearly 30 dollars when the game was new. I don't like the idea that I have to keep paying to keep playing.I noticed no improvement in the physics or graphics compared to Dirt 2.I tossed it in the garbage and vowed never to buy another codemasters game. I'm really tired of arcade style rally/racing games anyway.I'll continue to get my fix from Richard Burns Rally, and you should to. :)http://www.amazon.com/Richard-Burns-Rally-PC/dp/B000C97156/ref=sr_1_1?ie=UTF8&qid;=1341886844&sr;=8-1&keywords;=richard+burns+rallyThank you for reading my review! If you enjoyed it, be sure to rate it as helpful.\",\n", + " options={\"num_predict\": 128},\n", + ")\n", "print(response[\"response\"])" ] }, @@ -632,26 +630,26 @@ "\n", "# Iterate over the reviews\n", "for i, review in enumerate(curr_reviews):\n", - " print(f\"Processing review {i+1} of {len(curr_reviews)}\")\n", - " \n", + " print(f\"Processing review {i + 1} of {len(curr_reviews)}\")\n", + "\n", " if i % 100 == 0 and i > 0:\n", " temperature += 0.1\n", " top_k += 10\n", " print(f\"Temperature: {temperature}, Top K: {top_k}\")\n", "\n", " response = ollama.generate(\n", - " model='data-drift-simulator',\n", + " model=\"data-drift-simulator\",\n", " prompt=review,\n", " options={\n", - " 'temperature': temperature,\n", - " 'mirostat_tau': mirostat_tau,\n", - " 'num_predict': num_predict,\n", - " 'top_k': top_k,\n", - " 'top_p': top_p\n", - " }\n", + " \"temperature\": temperature,\n", + " \"mirostat_tau\": mirostat_tau,\n", + " \"num_predict\": num_predict,\n", + " \"top_k\": top_k,\n", + " \"top_p\": top_p,\n", + " },\n", " )\n", - " print(response['response'])\n", - " rewritten_reviews.append(response['response'])" + " print(response[\"response\"])\n", + " rewritten_reviews.append(response[\"response\"])" ] }, { @@ -902,7 +900,7 @@ "classifier = pipeline(\"text-classification\", model=\"model-drift-simulation\")\n", "\n", "# Assuming current_df is your DataFrame with a column named 'text'\n", - "texts = df_current_test['rewritten_reviews']\n", + "texts = df_current_test[\"rewritten_reviews\"]\n", "\n", "# Initialize lists to store labels and scores\n", "labels = []\n", @@ -911,12 +909,12 @@ "# Iterate over each text in the DataFrame and make predictions\n", "for text in texts:\n", " prediction = classifier(text)\n", - " labels.append(prediction[0]['label'])\n", - " scores.append(prediction[0]['score'])\n", + " labels.append(prediction[0][\"label\"])\n", + " scores.append(prediction[0][\"score\"])\n", "\n", "# Add the predicted labels and scores as new columns to the original DataFrame\n", - "df_current_test['predicted_label'] = labels\n", - "df_current_test['predicted_score'] = scores\n", + "df_current_test[\"predicted_label\"] = labels\n", + "df_current_test[\"predicted_score\"] = scores\n", "df_current_test.head()" ] }, @@ -942,26 +940,16 @@ " rg.FeedbackRecord(\n", " fields={\"text\": row[\"rewritten_reviews\"]},\n", " suggestions=[\n", - " {\n", - " \"question_name\": \"label\",\n", - " \"value\": row[\"predicted_label\"],\n", - " \"agent\": \"model_drift\"\n", - " }\n", - " ],\n", - " responses=[\n", " {\n", - " \"values\": {\n", - " \"label\": {\n", - " \"value\": id2label[row[\"rating\"]]\n", - " }\n", - " }\n", + " \"question_name\": \"label\",\n", + " \"value\": row[\"predicted_label\"],\n", + " \"agent\": \"model_drift\",\n", " }\n", " ],\n", - " metadata={\n", - " \"datetime\": str(row[\"datetime\"])\n", - " }\n", + " responses=[{\"values\": {\"label\": {\"value\": id2label[row[\"rating\"]]}}}],\n", + " metadata={\"datetime\": str(row[\"datetime\"])},\n", " )\n", - " for _ , row in df_current_test.iterrows()\n", + " for _, row in df_current_test.iterrows()\n", "]\n", "\n", "curr_rg_ds.add_records(records)\n", @@ -1009,7 +997,11 @@ ], "source": [ "# Compute the model metrics\n", - "model_metrics_unified = curr_rg_ds.compute_model_metrics(question_name=\"label\", metric_names=[\"accuracy\", \"precision\", \"recall\", \"f1-score\", \"confusion-matrix\"], strategy=\"majority\")\n", + "model_metrics_unified = curr_rg_ds.compute_model_metrics(\n", + " question_name=\"label\",\n", + " metric_names=[\"accuracy\", \"precision\", \"recall\", \"f1-score\", \"confusion-matrix\"],\n", + " strategy=\"majority\",\n", + ")\n", "model_metrics_unified" ] }, @@ -1046,15 +1038,16 @@ "def clean_review(review):\n", " review = review.lower()\n", " review = re.sub(r\"http\\S+\", \"\", review)\n", - " review = re.sub(r'[^a-zA-Z\\s]', '', review)\n", - " \n", + " review = re.sub(r\"[^a-zA-Z\\s]\", \"\", review)\n", + "\n", " tokens = word_tokenize(review)\n", - " stop_words = set(stopwords.words('english'))\n", + " stop_words = set(stopwords.words(\"english\"))\n", " tokens = [word for word in tokens if word not in stop_words]\n", - " cleaned_review = ' '.join(tokens)\n", - " \n", + " cleaned_review = \" \".join(tokens)\n", + "\n", " return cleaned_review\n", "\n", + "\n", "# Clean the list of reviews\n", "cleaned_ref_reviews = [clean_review(review) for review in ref_reviews]\n", "cleaned_rew_reviews = [clean_review(review) for review in rewritten_reviews]" @@ -1102,7 +1095,9 @@ "topic_model = BERTopic(verbose=True)\n", "topics, probs = topic_model.fit_transform(total_reviews)\n", "\n", - "topics_over_time = topic_model.topics_over_time(total_reviews, total_datetime, nr_bins=20)" + "topics_over_time = topic_model.topics_over_time(\n", + " total_reviews, total_datetime, nr_bins=20\n", + ")" ] }, { @@ -2483,10 +2478,7 @@ "outputs": [], "source": [ "# Create a dataframe with all the data over time\n", - "data = {\n", - " 'datetime': total_datetime,\n", - " 'text': ref_reviews + rewritten_reviews\n", - "}\n", + "data = {\"datetime\": total_datetime, \"text\": ref_reviews + rewritten_reviews}\n", "df_curr_ref = pd.DataFrame(data)\n", "\n", "# Extract the metrics\n", @@ -2525,32 +2517,29 @@ } ], "source": [ - "# Create the time series subplots \n", - "fig,ax = plt.subplots( 2, 2, \n", - "\t\t\t\t\tfigsize = ( 10, 8)) \n", + "# Create the time series subplots\n", + "fig, ax = plt.subplots(2, 2, figsize=(10, 8))\n", "\n", - "sns.lineplot( x = \"datetime\", y = \"token_length_mean\", \n", - "\t\t\tcolor = 'r', data = metrics_df, \n", - "\t\t\tax = ax[0][0]) \n", + "sns.lineplot(\n", + " x=\"datetime\", y=\"token_length_mean\", color=\"r\", data=metrics_df, ax=ax[0][0]\n", + ")\n", "\n", - "ax[0][0].tick_params(labelrotation = 25) \n", - "sns.lineplot( x = \"datetime\", y = \"token_length_median\", \n", - "\t\t\tcolor = 'g', data = metrics_df, \n", - "\t\t\tax = ax[0][1]) \n", + "ax[0][0].tick_params(labelrotation=25)\n", + "sns.lineplot(\n", + " x=\"datetime\", y=\"token_length_median\", color=\"g\", data=metrics_df, ax=ax[0][1]\n", + ")\n", "\n", - "ax[0][1].tick_params(labelrotation = 25) \n", - "sns.lineplot(x = \"datetime\", y = \"token_length_std\", \n", - "\t\t\tcolor = 'b', data = metrics_df, \n", - "\t\t\tax = ax[1][0]) \n", + "ax[0][1].tick_params(labelrotation=25)\n", + "sns.lineplot(\n", + " x=\"datetime\", y=\"token_length_std\", color=\"b\", data=metrics_df, ax=ax[1][0]\n", + ")\n", "\n", - "ax[1][0].tick_params(labelrotation = 25) \n", + "ax[1][0].tick_params(labelrotation=25)\n", "\n", - "sns.lineplot(x = \"datetime\", y = \"n_tokens\", \n", - "\t\t\tcolor = 'y', data = metrics_df, \n", - "\t\t\tax = ax[1][1]) \n", + "sns.lineplot(x=\"datetime\", y=\"n_tokens\", color=\"y\", data=metrics_df, ax=ax[1][1])\n", "\n", - "ax[1][1].tick_params(labelrotation = 25) \n", - "fig.tight_layout(pad = 1.2) " + "ax[1][1].tick_params(labelrotation=25)\n", + "fig.tight_layout(pad=1.2)" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/train-reward-model-rlhf.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/train-reward-model-rlhf.ipynb index d28e6c4864..79682b58bb 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/train-reward-model-rlhf.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/train-reward-model-rlhf.ipynb @@ -1,3064 +1,3088 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "l1RUBJobMBYN" - }, - "source": [ - "# 🏆 Train a reward model for RLHF\n", - "\n", - "Collecting comparison data to train a reward model is a crucial part of RLHF and LLM evaluation. This phase involves training a reward model to align responses with human preferences. Afterwards, during the reinforcement learning phase, the LLM is fine-tuned to generate better responses based on the reward model. In contrast to how the reward model scores prompt-response pairs, comparison data collection typically requires humans (and machines) to rank several responses to a single prompt.\n", - "\n", - "In this example, we will describe how you can **build a dataset for collecting human preferences and train a reward model using the amazing trl library**. See below the workflow we will be following.\n", - "\n", - "Let's get started!\n", - "\n", - "\"Comparison\n", - "\n", - "
\n", - "\n", - "Note \n", - "\n", - "This tutorial is a Jupyter Notebook. There are two options to run it:\n", - "\n", - "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", - "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", - "\n", - "
" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "l1RUBJobMBYN" + }, + "source": [ + "# 🏆 Train a reward model for RLHF\n", + "\n", + "Collecting comparison data to train a reward model is a crucial part of RLHF and LLM evaluation. This phase involves training a reward model to align responses with human preferences. Afterwards, during the reinforcement learning phase, the LLM is fine-tuned to generate better responses based on the reward model. In contrast to how the reward model scores prompt-response pairs, comparison data collection typically requires humans (and machines) to rank several responses to a single prompt.\n", + "\n", + "In this example, we will describe how you can **build a dataset for collecting human preferences and train a reward model using the amazing trl library**. See below the workflow we will be following.\n", + "\n", + "Let's get started!\n", + "\n", + "\"Comparison\n", + "\n", + "
\n", + "\n", + "Note \n", + "\n", + "This tutorial is a Jupyter Notebook. There are two options to run it:\n", + "\n", + "- Use the Open in Colab button at the top of this page. This option allows you to run the notebook directly on Google Colab. Don't forget to change the runtime type to GPU for faster model training and inference.\n", + "- Download the .ipynb file by clicking on the View source link at the top of the page. This option allows you to download the notebook and run it on your local machine or on a Jupyter notebook tool of your choice.\n", + "\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "hZb87bUJMBYQ" + }, + "source": [ + "## Setup\n", + "\n", + "For this tutorial, you will need to have an Argilla server running. If you don't have one already, check out our [Quickstart](../../../getting_started/quickstart.md) or [Installation](../../../getting_started/quickstart_installation.ipynb) pages. Once you do, complete the following steps:\n", + "\n", + "1. Install the Argilla client and the required third-party libraries using `pip`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dSw4HgoJMBYT" + }, + "outputs": [], + "source": [ + "%pip install -U argilla pandas trl plotly -qqq" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "JoVCJkSlMBYT" + }, + "source": [ + "2. Let's make the necessary imports:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "IbSU2uDhMBYU" + }, + "outputs": [], + "source": [ + "import random\n", + "\n", + "import torch\n", + "from datasets import Dataset, load_dataset\n", + "from transformers import (\n", + " AutoModelForSequenceClassification,\n", + " AutoTokenizer,\n", + " TrainingArguments,\n", + ")\n", + "from trl import RewardTrainer\n", + "\n", + "import argilla as rg" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "A5gHXENrMBYW" + }, + "source": [ + "3. If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DZRJUFUhMBYW" + }, + "outputs": [], + "source": [ + "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", + "# you can find the Spaces URL under the Embed this space button\n", + "# Replace api_key if you configured a custom API key\n", + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Set the HF_TOKEN environment variable\n", + "# import os\n", + "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", + "\n", + "# # Replace api_url with the url to your HF Spaces URL\n", + "# # Replace api_key if you configured a custom API key\n", + "# rg.init(\n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", + "# api_key=\"admin.apikey\",\n", + "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enable Telemetry\n", + "\n", + "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from argilla.utils.telemetry import tutorial_running\n", + "\n", + " tutorial_running()\n", + "except ImportError:\n", + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "wmT7RQqlMBYW" + }, + "source": [ + "## Configure the dataset\n", + "As a first step, let's load the dataset and quickly explore the data. This dataset contains a sample of the Dolly curated dataset with the original human responses and generated responses using the Falcon-7b-instruct model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "hZb87bUJMBYQ" - }, - "source": [ - "## Setup\n", - "\n", - "For this tutorial, you will need to have an Argilla server running. If you don't have one already, check out our [Quickstart](../../../getting_started/quickstart.md) or [Installation](../../../getting_started/quickstart_installation.ipynb) pages. Once you do, complete the following steps:\n", - "\n", - "1. Install the Argilla client and the required third-party libraries using `pip`:" - ] + "id": "tYpQbwORMBYX", + "outputId": "07f3808f-5965-4cb9-9375-0d3e92c2436d" + }, + "outputs": [], + "source": [ + "hf_dataset = load_dataset(\n", + " \"argilla/dolly-curated-comparison-falcon-7b-instruct\", split=\"train\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 780 }, + "id": "61DMAbebMBYY", + "outputId": "97009132-c55e-4b64-9dc3-bb922b74e414" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dSw4HgoJMBYT" - }, - "outputs": [], - "source": [ - "%pip install -U argilla pandas trl plotly -qqq" + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
promptresponse-1response-2categoryoriginal_responseexternal_id
0What is DepreciationWhat is Depreciation – 10 Important Facts to K...What is Depreciation on a Car?\\nDepreciation i...open_qaDepreciation is the drop in value of an asset ...518
1What do you know about the city of Aberdeen in...Aberdeen, the city in which I've lived for the...As an AI language model, I don't have personal...open_qaAberdeen is a city located in the North East o...351
2Describe thunderstorm season in the United Sta...Describe thunderstorm season in the United Sta...Describe thunderstorm season in the United Sta...information_extractionThunderstorm season in the United States and C...10567
3When did Peloton IPO?\\nOn September 26, 2019, ...When did Peloton IPO?\\nPeloton launched its in...When did Peloton IPO?\\nPeloton IPO'd on May 26...closed_qaPeloton became a public company via an initial...12412
4What is the best way to answer an interview qu...The best way to answer an interview question m...Some of the best ways to answer an interview q...creative_writingThe first recommended step is to ask clarifyin...2521
.....................
7396How do i accept the changeHow do i accept the change in my life\\nAccepti...I's a great opportunity to improve. The only t...brainstormingEmbrace the change and see the difference15010
7397Extract the teams that the footballer Sócrates...Extract the teams that the footballer Sócrates...Extract the teams that the footballer Sócrates...information_extractionBrazil, Botafogo-SP, Corinthians, Fiorentina9970
7398Without quoting directly from the text give me...Without quoting directly from the text give me...Without quoting directly from the text give me...summarizationBrendon Small is a stand-up comedian, Creator...14205
7399Is Killing is Sin ? Is it tureIs Killing is Sin ? Is it ture?\\nKilling can b...Is Killing is Sin ? Is it ture?\\nKilling is no...brainstormingKilling a human being should not be sin becaus...11253
7400Who was Otto von Bismarck?\\nOtto, Prince of Bi...Who was Otto von Bismarck?\\nOtto von Bismarck ...Who was Otto von Bismarck?\\nOtto von Bismarck ...information_extractionOtto von Bismarck was a Prussian and German so...12872
\n", + "

7401 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " prompt \\\n", + "0 What is Depreciation \n", + "1 What do you know about the city of Aberdeen in... \n", + "2 Describe thunderstorm season in the United Sta... \n", + "3 When did Peloton IPO?\\nOn September 26, 2019, ... \n", + "4 What is the best way to answer an interview qu... \n", + "... ... \n", + "7396 How do i accept the change \n", + "7397 Extract the teams that the footballer Sócrates... \n", + "7398 Without quoting directly from the text give me... \n", + "7399 Is Killing is Sin ? Is it ture \n", + "7400 Who was Otto von Bismarck?\\nOtto, Prince of Bi... \n", + "\n", + " response-1 \\\n", + "0 What is Depreciation – 10 Important Facts to K... \n", + "1 Aberdeen, the city in which I've lived for the... \n", + "2 Describe thunderstorm season in the United Sta... \n", + "3 When did Peloton IPO?\\nPeloton launched its in... \n", + "4 The best way to answer an interview question m... \n", + "... ... \n", + "7396 How do i accept the change in my life\\nAccepti... \n", + "7397 Extract the teams that the footballer Sócrates... \n", + "7398 Without quoting directly from the text give me... \n", + "7399 Is Killing is Sin ? Is it ture?\\nKilling can b... \n", + "7400 Who was Otto von Bismarck?\\nOtto von Bismarck ... \n", + "\n", + " response-2 \\\n", + "0 What is Depreciation on a Car?\\nDepreciation i... \n", + "1 As an AI language model, I don't have personal... \n", + "2 Describe thunderstorm season in the United Sta... \n", + "3 When did Peloton IPO?\\nPeloton IPO'd on May 26... \n", + "4 Some of the best ways to answer an interview q... \n", + "... ... \n", + "7396 I's a great opportunity to improve. The only t... \n", + "7397 Extract the teams that the footballer Sócrates... \n", + "7398 Without quoting directly from the text give me... \n", + "7399 Is Killing is Sin ? Is it ture?\\nKilling is no... \n", + "7400 Who was Otto von Bismarck?\\nOtto von Bismarck ... \n", + "\n", + " category \\\n", + "0 open_qa \n", + "1 open_qa \n", + "2 information_extraction \n", + "3 closed_qa \n", + "4 creative_writing \n", + "... ... \n", + "7396 brainstorming \n", + "7397 information_extraction \n", + "7398 summarization \n", + "7399 brainstorming \n", + "7400 information_extraction \n", + "\n", + " original_response external_id \n", + "0 Depreciation is the drop in value of an asset ... 518 \n", + "1 Aberdeen is a city located in the North East o... 351 \n", + "2 Thunderstorm season in the United States and C... 10567 \n", + "3 Peloton became a public company via an initial... 12412 \n", + "4 The first recommended step is to ask clarifyin... 2521 \n", + "... ... ... \n", + "7396 Embrace the change and see the difference 15010 \n", + "7397 Brazil, Botafogo-SP, Corinthians, Fiorentina 9970 \n", + "7398 Brendon Small is a stand-up comedian, Creator... 14205 \n", + "7399 Killing a human being should not be sin becaus... 11253 \n", + "7400 Otto von Bismarck was a Prussian and German so... 12872 \n", + "\n", + "[7401 rows x 6 columns]" ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = hf_dataset.to_pandas()\n", + "df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-OMxM53bMBYZ" + }, + "source": [ + "For reward modeling, we would like to ask labelers to rank two responses for a prompt from best to worst. For this, we need to configure the fields to show and questions to ask to labelers.\n", + "\n", + "The dataset will show the users three fields `instruction`, which corresponds to the prompt, `response-1` and `response-2`, which correspond to each of the responses to the instruction.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "VZ5prVQNMBYa" + }, + "outputs": [], + "source": [ + "# list of fields that we will use later for our dataset settings\n", + "fields = [\n", + " rg.TextField(name=\"instruction\", title=\"User instruction\"),\n", + " rg.TextField(name=\"response-1\"),\n", + " rg.TextField(name=\"response-2\"),\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "VKU888AnMBYa" + }, + "source": [ + "In this use case, we will ask the users to select the best response. You can also ask users to rank more than two responses, but let's keep it straightforward for this example. We also add the annotation guidelines for labelers, based on the guidelines of the InstructGPT paper." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "wJvW18MEMBYb" + }, + "outputs": [], + "source": [ + "question = rg.RatingQuestion(\n", + " name=\"choose-best\",\n", + " title=\"Choose the best response:\",\n", + " description=\"Choose the most helpful, harmless, and truthful response. Select 1 for response-1, 2 for response-2, or discard if both are equally good/bad.\",\n", + " values=[1, 2],\n", + " required=True,\n", + ")\n", + "\n", + "guidelines = \"\"\"These guidelines are based on the paper [Training Language Models to Follow Instructions with Human Feedback](https://cdn.openai.com/papers/Training_language_models_to_follow_instructions_with_human_feedback.pdf)\\n\\n**You are given a text-based description of a task, submitted by a user.**\\n\\nThis task description may be in the form of an explicit instruction (e.g. \\\"Write a story about a wise frog.\\\"). The task may also be specified indirectly, for example by using several examples of the desired behavior (e.g. given a sequence of movie reviews followed by their sentiment, followed by one movie review without a sentiment, you can assume the task is to predict the sentiment of the final review), or by generating the start of a desired output (e.g. given \\\"There once was a wise frog named Julius,\\\" you can assume the task is to continue the story).\\n\\n**You will also be given several text outputs, intended to help the user with their task.** Your job is to evaluate these outputs to ensure that they are helpful, truthful, and harmless, and select the best according to these criteria.\\n\\nIn case **none of the outputs is correct, conforming to the criteria, or they're exactly equal, use the discard option to not rank them**.\\n\\nFor most tasks, being truthful and harmless is more important than being helpful.\\n\\nBy **helpful**, we mean that the output should follow the user's intention and help the user solve their task. Some examples of helpful behavior:\\n\\n- Writing in clear language.\\n- Answering the question they meant to ask, even if they mis-asked it.\\n- Being sensitive to internationality (e.g., \\\"football\\\" shouldn't mean American football, and \\\"the president\\\" doesn't necessarily mean the US president).\\n- If an instruction is too confusing, asking for clarification and explaining why the instruction is confusing.\\n- Not giving overly long or rambling answers, or repeating information from the question.\\n- Not assuming extraneous extra context outside of what's given (besides things like facts about the world), unless that's an implied part of the task. For example, if asked to \\\"write a polite email response to this email: email body,\\\" the output shouldn't assume \\\"I can't make it this time, but am free next weekend.\\\" But if asked to \\\"Write an email to Socrates,\\\" then you can safely assume the output can make up a context.\\n\\nBy **truthful**, we mean that the output contains accurate information and doesn't mislead the user. Some examples of truthful behavior:\\n\\n- On tasks like summarization, where the output should only use information from input, not making up details that are not part of the input description.\\n- Not producing clearly false information about the world (e.g., making up facts or promoting conspiracies). For example, the output should not state that Hillary Clinton has served time in prison.\\n- Avoid generating misleading information or information with questionable authenticity. For example, if the input asks \\\"Why did Hillary Clinton go to jail?\\\", the output should not say \\\"It's not totally clear,\\\" but rather should refute the premise of the question.\\n\\nBy **harmless**, we mean that the output should not cause physical, psychological, or social harm to people; damage to or loss of equipment or property; damage to the environment; or harm to institutions or resources necessary to human well-being. Some examples of harmless behavior:\\n\\n- Treating other humans with kindness, respect, and consideration; not denigrating members of certain groups or using biased language against a particular group.\\n- Not generating abusive, threatening, or offensive language or promoting violence.\\n- Not writing sexual or violent content if it's not asked for.\\n- Not giving bad real-world advice or promoting illegal activity.\\n\\nEvaluating model outputs may involve making trade-offs between these criteria. These trade-offs will depend on the task.\"\"\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "RfZuuRpr07n4" + }, + "source": [ + "## Create records\n", + "\n", + "The next step is to build the records for collecting comparison data. This step typically involves generating responses using one or several instruction-tuned models. \n", + "\n", + ".. tip:: When showing responses from two different models to labelers it's recommended to randomly assign different model responses to `response-1` and `response-2` for each record. Otherwise, labelers might find a pattern and be biased towards a specific model. This is especially relevant for model comparison and evaluation but also applies to comparison data for reward modeling. \n", + "\n", + "In this example, we've already generated a dataset using the instructions from the Dolly curated dataset with the Falcon-7B-instruct model. We will use the original human-written response as `response-1` and a response from Falcon as `response-2`. \n", + "\n", + "You can build the records and publish them for labelers as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "JoVCJkSlMBYT" - }, - "source": [ - "2. Let's make the necessary imports:" - ] + "id": "qZUeAT-1OFfz", + "outputId": "2b605fd6-3708-4f8c-d18a-4dc1f75f981e" + }, + "outputs": [], + "source": [ + "# build records from hf dataset\n", + "records = [\n", + " rg.FeedbackRecord(\n", + " fields={\n", + " \"instruction\": r[\"prompt\"],\n", + " \"response-1\": r[\"original_response\"],\n", + " \"response-2\": r[\"response-2\"],\n", + " }\n", + " )\n", + " for r in hf_dataset\n", + "]\n", + "\n", + "# create dataset\n", + "dataset = rg.FeedbackDataset(fields=fields, questions=[question], guidelines=guidelines)\n", + "\n", + "# add records and publish\n", + "dataset.add_records(records)\n", + "dataset.push_to_argilla(\"comparison-data-falcon\", workspace=\"admin\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now the dataset is ready for labeling. This is the Feedback UI we have just configured:\n", + "\n", + "\n", + "\"A\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4vUhGWWnufDq" + }, + "source": [ + "Additionally, you can push the dataset to the Hub for reproducibility and reuse. This dataset is available in the Hub, feel free to [read the dataset card](https://huggingface.co/datasets/argilla/comparison-data-falcon-with-feedback) to understand its structure, annotation guidelines, and how to import it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0kqc8fADoX4Q" + }, + "outputs": [], + "source": [ + "# dataset.push_to_huggingface(\"comparison-data-falcon\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "aW40aC5xvhrR" + }, + "source": [ + "## Collect feedback and prepare the dataset\n", + "\n", + "Once the data has been labeled using the Argilla UI, we can retrieve it with the Python SDK and prepare it for training the reward model with TRL.\n", + "\n", + "If you are running this tutorial but haven´t labeled any data points, execute the following cell to retrieve the labeled dataset from the Hugging Face Hub. This dataset already contains ranked responses and can be used for the next steps. The dataset is available in the Hub, feel free to [read the dataset card](https://huggingface.co/datasets/argilla/comparison-data-falcon-with-feedback) to understand its structure, annotation guidelines, responses, and how to import it. \n", + "\n", + "If you have labeled some examples jump and execute the second cell. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238, + "referenced_widgets": [ + "b4a7efcf448247c586546cee7ae4e853", + "ad4dd7f68c00437e878e2965e08457c8", + "e7b96b86f6df4826b04171eaec7fa2a6", + "56021dd10ea447029b2d64a3954f50a8", + "ff6274caa2cb4346b9862dcbf1846184", + "a681213d4f1f4c0db8cf2be709734067", + "626d87ba234a4af9aed53f633cefbd95", + "1c28d5ed9c7a41ad87e7fd83380e05a5", + "dab35d7560824b828e1976691e481301", + "23cccf0f1f2240ecaabb5d6efa7a8bbb", + "f5d4cf0d204f478094976a30309de13e", + "1eca0bb4151543098cfbb98b64191f92", + "299f4ae7c6524b9d8a06f22063d5a92f", + "4ed8d36201904c6cb43d0fcdf7b18f34", + "b9ed9f3bcbfd4403b4c1ea9eb3001543", + "a45c853e3a7c4b65ad3bdae9a67607bc", + "fc422a04dbcf4b979fa7569c171ac971", + "a291ef8013ee445b8f3e36be30e1da69", + "7ba0eca5e7984391b4d32608c85bd236", + "0e746ea7187c4b7382de0e5094d6234a", + "324c48fb64fb4021a4acec428fcf53ac", + "841a58e70b3f4621a4419d2872b3d348", + "3cc6da03d28447d0a7259c460b017296", + "ffc2b422e5284a3180e703cab2ebef1e", + "5151b3406f774c57b66c0651e10edb42", + "b04603b9ced743d991a61e42fc0d8ce6", + "99f75e3565334a0cad80b0ddf434156c", + "e295f279b61f48d683784131a8d533cc", + "254c7bb9621c457182976287a0a5ac66", + "9e60f5105a2f430d9c5f382c54b0610c", + "2b34ee7a285a4dbb8c1001d8162b8e44", + "4d451a340eb341ee88002e9ecce1d2a9", + "8c1c4cac8f0444e4a69bd316efde9080", + "b3dff83de3894147838358514c8ef63c", + "ac4deaed71894ca9b8bdba7e14a34212", + "8ed4b17c0dc4464cb5cc8bcfbc6bc803", + "9a45be3cbddc4d0cbcc3591fe2c2883e", + "51d0e514f6244692b2b1e6dbfcc0a460", + "0432b133dc614d1fa8e2d06edc413302", + "ffe071addb80421dba36b5797a594dbe", + "17ed61c237304d2bb77b80bd4def6466", + "809240690fb44738b6173cc53504bcf1", + "5d3a83515f8944d491fe3305ee01a314", + "52d358e242dd4876a66adbdb89fb03fc", + "dd782a3398d1437f9d3626bdf4b87be4", + "f632551058f744b0b9ad9696397c8d53", + "914b7ab6f21d486fa42be37545ea73d7", + "94903b2e56ef43b9b39ddd576a5a06ee", + "902a2c7722484d4283e5ba0757641a0e", + "23ef7fb2393f45d885d11ada8e5a3534", + "3aaa39cccc26416f9c7aeab4d50da8c5", + "04c0309dc09e46419995addb7238f5ee", + "a9dcbe78ae6641d38775818748dd71c4", + "5c23c54cc6bd4475ba755490afb520db", + "bda482b92a8d4229983e72581154eba9" + ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "IbSU2uDhMBYU" - }, - "outputs": [], - "source": [ - "import random\n", - "\n", - "import torch\n", - "from datasets import Dataset, load_dataset\n", - "from transformers import (\n", - " AutoModelForSequenceClassification,\n", - " AutoTokenizer,\n", - " TrainingArguments,\n", - ")\n", - "from trl import RewardTrainer\n", - "\n", - "import argilla as rg" - ] + "id": "p-0_iwa8IA2y", + "outputId": "abbc3e3a-6053-4a0e-c1a1-f3f5f32d686c" + }, + "outputs": [], + "source": [ + "# if you haven't ranked any responses with the UI run this cell\n", + "# otherwise ran the next one\n", + "feedback_dataset = rg.FeedbackDataset.from_huggingface(\n", + " \"argilla/comparison-data-falcon-with-feedback\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "A5gHXENrMBYW" - }, - "source": [ - "3. If you are running Argilla using the Docker quickstart image or Hugging Face Spaces, you need to init the Argilla client with the `URL` and `API_KEY`:" - ] + "id": "TZGfBzgmvg1d", + "outputId": "7efee8d2-79b2-4555-a489-f8a55fcbec31" + }, + "outputs": [], + "source": [ + "# run this cell if you have ranked the responses in the UI\n", + "feedback_dataset = rg.FeedbackDataset.from_argilla(\n", + " \"comparison-data-falcon\", workspace=\"admin\"\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next step is to prepare the dataset in the standard format for training a reward model. In particular, we want to select the chosen and rejected responses from the user feedback. We do this by creating a `TrainingTask` instance for reward modeling using a function that returns chosen-rejected tuples." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 537 }, + "id": "cll7j10swNT2", + "outputId": "0e4974e6-74b1-41be-c54c-92bca9732ddc" + }, + "outputs": [], + "source": [ + "from typing import Any, Dict\n", + "from argilla.feedback import TrainingTask\n", + "from collections import Counter\n", + "\n", + "\n", + "def formatting_func(sample: Dict[str, Any]):\n", + " # sample[\"choose-best\"] => [{'user_id': None, 'value': 1, 'status': 'submitted'}, ...]\n", + " values = [\n", + " annotation[\"value\"]\n", + " for annotation in sample[\"choose-best\"]\n", + " if annotation[\"status\"] == \"submitted\"\n", + " ]\n", + "\n", + " # We will only focus on the annotated records in the dataset\n", + " if Counter(values).most_common(1) != []:\n", + " # values => [1]\n", + " winning_response = Counter(values).most_common(1)[0][0]\n", + " print(Counter(values).most_common(1))\n", + " if winning_response == 1:\n", + " chosen = sample[\"response-1\"]\n", + " rejected = sample[\"response-2\"]\n", + " else:\n", + " chosen = sample[\"response-2\"]\n", + " rejected = sample[\"response-1\"]\n", + " return chosen, rejected\n", + "\n", + "\n", + "task = TrainingTask.for_reward_modeling(formatting_func=formatting_func)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we want, we can observe the resulting dataset by preparing it for training with TRL:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DZRJUFUhMBYW" - }, - "outputs": [], - "source": [ - "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", - "# you can find the Spaces URL under the Embed this space button\n", - "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\",\n", - " api_key=\"admin.apikey\"\n", - ")" + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['chosen', 'rejected'],\n", + " num_rows: 7401\n", + "})" ] - }, + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = feedback_dataset.prepare_for_training(framework=\"trl\", task=task)\n", + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" + "data": { + "text/plain": [ + "{'chosen': \"Depreciation is the drop in value of an asset due to wear and tear, age and obsolescence (going out of date) as recorded in an organization's financial records.\",\n", + " 'rejected': 'What is Depreciation – 10 Important Facts to Know?\\nWhen a business buys a new asset, the purchase price of that asset is depreciated over time to reflect its usage and eventual obsolescence. Depreciation expense can be a tax deductible expense and is usually a non-cash expense reported on a company’s income statement and balance sheet. The amount of depreciation expense a company reports each year is the difference between the original purchase price of the asset and what the current value of that asset might be. Here are 10 important facts to know about depreciation:\\n1. Depreciation is a non-cash expense. It is an expense that is reported in a business’s income statement and balance sheet and not a cash flow expense.\\n2. Depreciation is an accounting standard and it is required to be disclosed in a business’s financial statements.\\n3. The amount of depreciation is usually a tax expense and not a cash expense reported on a company’s income statement'}" ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset[0]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "UgoIyJanMBYk" + }, + "source": [ + "This dataset is ready to be used as comparison data to train a reward model.\n", + "\n", + "
\n", + "\n", + "Note\n", + "\n", + "The paper Direct Preference Optimization: Your Language Model is Secretly a Reward Model proposes DPO, a promising method for using comparison data directly to model human preference, eliminating the need for a reward model and the RL step. Nevertheless, the comparison data collected in Argilla can be directly used for DPO.\n", + "\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "F8r9_XJi1K9P" + }, + "source": [ + "## Train the reward model with `trl`\n", + "In this step, we will use the `RewardTrainer` from the trl library. To understand this step, we recommend you to [check the trl docs](https://huggingface.co/docs/trl/reward_trainer). \n", + "\n", + "
\n", + "To run this step, you need to rank some examples using the Argilla UI, or run the step above with the load from Hugging Face call: `feedback_dataset = FeedbackDataset.from_huggingface`\n", + "
\n", + "\n", + "To train a Reward Model, you need to choose a base model to fine-tune. In the literature, the base model is typically the supervised fine-tuned model resulting from the instruction-tuning step. In this example, that would mean using the [Falcon-7B-instruct model](https://huggingface.co/tiiuae/falcon-7b-instruct). However, as Reward Models are essentially classifiers you can use a more lightweight backbone model, for this example we will use `distilroberta-base` but feel free to experiment with other models.\n", + "\n", + "The code below fine-tunes a `SequenceClassification` model with our preference dataset. The most interesting part is the `formatting_func` function. This function combines instructions with chosen and rejected responses, creating two new strings. These strings are tokenized, becoming input for a reward model that learns to distinguish between good and bad responses based on these examples. The model will be optimized to assign higher values to preferred responses and lower values to rejected responses.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 516, + "referenced_widgets": [ + "b44d62c0bef242fabab85ff6129b42dc", + "9a872753cf9a4fde9caa69660e85c896", + "2a9d2460cf024776bb0010ff6f1a5fa7", + "8a6d78da941d429b981d82a1b0d249ab", + "8b2770c23c424f8698d7f3ca60bf5d9a", + "9fc9251942db44548dcc9b55312f6963", + "660b25b7377a4bd0a2baf03a437dc060", + "6e00a979fa604747b320018d4d5549ad", + "5ed78905729747d8a3dfe68590950d73", + "fdd275ff03504203b5dbce2ea732fd44", + "7d01e2401fa44199afb31d5dcc443761" + ] }, + "id": "-MtSUam61J_Z", + "outputId": "6680758c-3b9c-444a-e73b-48bbb4c1b4b5" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Set the HF_TOKEN environment variable\n", - "# import os\n", - "# os.environ['HF_TOKEN'] = \"your-hf-token\"\n", - "\n", - "# # Replace api_url with the url to your HF Spaces URL\n", - "# # Replace api_key if you configured a custom API key\n", - "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", - "# api_key=\"admin.apikey\",\n", - "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", - "# )" + "data": { + "text/html": [ + "
[08/08/23 16:36:51] INFO     INFO:ArgillaTRLTrainer:{'eval_loss': 0.1626577377319336, 'eval_accuracy':   trl.py:226\n",
+       "                             0.937204591492235, 'eval_runtime': 6.5907, 'eval_samples_per_second':                 \n",
+       "                             224.709, 'eval_steps_per_second': 28.221, 'epoch': 1.0}                               \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08/08/23 16:36:51]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m INFO:ArgillaTRLTrainer:\u001b[1m{\u001b[0m\u001b[32m'eval_loss'\u001b[0m: \u001b[1;36m0.1626577377319336\u001b[0m, \u001b[32m'eval_accuracy'\u001b[0m: \u001b]8;id=234053;file://C:\\code\\argilla\\src\\argilla\\client\\feedback\\training\\frameworks\\trl.py\u001b\\\u001b[2mtrl.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=146316;file://C:\\code\\argilla\\src\\argilla\\client\\feedback\\training\\frameworks\\trl.py#226\u001b\\\u001b[2m226\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m \u001b[1;36m0.937204591492235\u001b[0m, \u001b[32m'eval_runtime'\u001b[0m: \u001b[1;36m6.5907\u001b[0m, \u001b[32m'eval_samples_per_second'\u001b[0m: \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[1;36m224.709\u001b[0m, \u001b[32m'eval_steps_per_second'\u001b[0m: \u001b[1;36m28.221\u001b[0m, \u001b[32m'epoch'\u001b[0m: \u001b[1;36m1.0\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from argilla.feedback import ArgillaTrainer\n", + "\n", + "model_name = \"distilroberta-base\"\n", + "trainer = ArgillaTrainer(\n", + " dataset=feedback_dataset,\n", + " task=task,\n", + " framework=\"trl\",\n", + " model=model_name,\n", + " train_size=0.8,\n", + ")\n", + "trainer.update_config(\n", + " per_device_train_batch_size=16,\n", + " evaluation_strategy=\"steps\",\n", + " logging_steps=200,\n", + ")\n", + "trainer.train(\"./reward_model\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using the Reward Model\n", + "The resulting model is [fully open-source and available on the Hugging Hub](https://huggingface.co/argilla/roberta-base-reward-model-falcon-dolly).\n", + "\n", + "\n", + "This is how you can use it with your own data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(\n", + " \"argilla/roberta-base-reward-model-falcon-dolly\"\n", + ")\n", + "\n", + "model = AutoModelForSequenceClassification.from_pretrained(\n", + " \"argilla/roberta-base-reward-model-falcon-dolly\"\n", + ")\n", + "\n", + "\n", + "def get_score(model, tokenizer, prompt, response):\n", + " # Tokenize the input sequences\n", + " inputs = tokenizer.encode_plus(\n", + " prompt,\n", + " response,\n", + " truncation=True,\n", + " padding=\"max_length\",\n", + " max_length=512,\n", + " return_tensors=\"pt\",\n", + " )\n", + "\n", + " # Perform forward pass\n", + " with torch.no_grad():\n", + " outputs = model(**inputs)\n", + "\n", + " # Extract the logits\n", + " logits = outputs.logits\n", + "\n", + " return logits.item()\n", + "\n", + "\n", + "# Example usage\n", + "prompt = \"What is Depreciation\"\n", + "example_less_pref_response = \"What is Depreciation – 10 Important Facts to Know? When a business buys a new asset, the purchase price of that asset is depreciated over time to reflect its usage and eventual obsolescence. Depreciation expense can be a tax deductible expense and is usually a non-cash expense reported on a company’s income statement and balance sheet. The amount of depreciation expense a company reports each year is the difference between the original purchase price of the asset and what the current value of that asset might be. Here are 10 important facts to know about depreciation: 1. Depreciation is a non-cash expense. It is an expense that is reported in a business’s income statement and balance sheet and not a cash flow expense. 2. Depreciation is an accounting standard and it is required to be disclosed in a business’s financial statements. 3. The amount of depreciation is usually a tax expense and not a cash expense reported on a company’s income statement\"\n", + "example_preferred_response = \"Depreciation is the drop in value of an asset due to wear and tear, age and obsolescence (going out of date) as recorded in an organization's financial records.\"\n", + "\n", + "score = get_score(model, tokenizer, prompt, example_less_pref_response)\n", + "print(score)\n", + "# >> -3.915163993835449\n", + "\n", + "score = get_score(model, tokenizer, prompt, example_preferred_response)\n", + "print(score)\n", + "# >> 7.460323333740234" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "cxy90ZY4MBYk" + }, + "source": [ + "## Summary\n", + "\n", + "In this tutorial, we learned how to create a comparison dataset by ranking responses from the Dolly dataset and Falcon. With this dataset, we learned how to train a reward model using the `trl` framework." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "BeKXm3s3IFr9" + }, + "source": [ + "## Appendix: How to build the dataset with pre-loaded responses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "efyVbRYXIL6H" + }, + "outputs": [], + "source": [ + "picker = [\"response-1\", \"response-2\"]\n", + "\n", + "\n", + "def get_chosen_and_not_chosen(l):\n", + " # Generate a random index between 0 and length of the list - 1\n", + " chosen_id = random.randint(0, len(l) - 1)\n", + " not_chosen_id = 1 - chosen_id # This will be 0 if chosen_id is 1 and vice versa\n", + "\n", + " return l[chosen_id], l[not_chosen_id], chosen_id\n", + "\n", + "\n", + "records = []\n", + "\n", + "for r in hf_dataset:\n", + " chosen, not_chosen, chosen_id = get_chosen_and_not_chosen(picker)\n", + " chosen_from_falcon, _, _ = get_chosen_and_not_chosen(picker)\n", + "\n", + " record = rg.FeedbackRecord(\n", + " fields={\n", + " \"instruction\": r[\"prompt\"],\n", + " chosen: r[\"original_response\"],\n", + " not_chosen: r[chosen_from_falcon],\n", + " },\n", + " responses=[{\"values\": {\"choose-best\": {\"value\": chosen_id + 1}}}],\n", + " external_id=r[\"external_id\"],\n", + " )\n", + " records.append(record)\n", + "\n", + "# create dataset\n", + "dataset = rg.FeedbackDataset(fields=fields, questions=[question], guidelines=guidelines)\n", + "\n", + "# add records and publish\n", + "dataset.add_records(records)\n", + "\n", + "dataset.push_to_huggingface(\"argilla/comparison-data-falcon-with-feedback\")" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + }, + "vscode": { + "interpreter": { + "hash": "2d98cb9bf90a932b5bf8e86e91214497eb0e38eb318595fbd6fbd5460fe92036" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0432b133dc614d1fa8e2d06edc413302": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Enable Telemetry\n", - "\n", - "We gain valuable insights from how you interact with our tutorials. To improve ourselves in offering you the most suitable content, using the following lines of code will help us understand that this tutorial is serving you effectively. Though this is entirely anonymous, you can choose to skip this step if you prefer. For more info, please check out the [Telemetry](../../reference/telemetry.md) page." - ] + "04c0309dc09e46419995addb7238f5ee": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " from argilla.utils.telemetry import tutorial_running\n", - " tutorial_running()\n", - "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" - ] + "0e746ea7187c4b7382de0e5094d6234a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "wmT7RQqlMBYW" - }, - "source": [ - "## Configure the dataset\n", - "As a first step, let's load the dataset and quickly explore the data. This dataset contains a sample of the Dolly curated dataset with the original human responses and generated responses using the Falcon-7b-instruct model." - ] + "17ed61c237304d2bb77b80bd4def6466": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "tYpQbwORMBYX", - "outputId": "07f3808f-5965-4cb9-9375-0d3e92c2436d" - }, - "outputs": [], - "source": [ - "hf_dataset = load_dataset(\"argilla/dolly-curated-comparison-falcon-7b-instruct\", split=\"train\")" - ] + "1c28d5ed9c7a41ad87e7fd83380e05a5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 780 - }, - "id": "61DMAbebMBYY", - "outputId": "97009132-c55e-4b64-9dc3-bb922b74e414" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
promptresponse-1response-2categoryoriginal_responseexternal_id
0What is DepreciationWhat is Depreciation – 10 Important Facts to K...What is Depreciation on a Car?\\nDepreciation i...open_qaDepreciation is the drop in value of an asset ...518
1What do you know about the city of Aberdeen in...Aberdeen, the city in which I've lived for the...As an AI language model, I don't have personal...open_qaAberdeen is a city located in the North East o...351
2Describe thunderstorm season in the United Sta...Describe thunderstorm season in the United Sta...Describe thunderstorm season in the United Sta...information_extractionThunderstorm season in the United States and C...10567
3When did Peloton IPO?\\nOn September 26, 2019, ...When did Peloton IPO?\\nPeloton launched its in...When did Peloton IPO?\\nPeloton IPO'd on May 26...closed_qaPeloton became a public company via an initial...12412
4What is the best way to answer an interview qu...The best way to answer an interview question m...Some of the best ways to answer an interview q...creative_writingThe first recommended step is to ask clarifyin...2521
.....................
7396How do i accept the changeHow do i accept the change in my life\\nAccepti...I's a great opportunity to improve. The only t...brainstormingEmbrace the change and see the difference15010
7397Extract the teams that the footballer Sócrates...Extract the teams that the footballer Sócrates...Extract the teams that the footballer Sócrates...information_extractionBrazil, Botafogo-SP, Corinthians, Fiorentina9970
7398Without quoting directly from the text give me...Without quoting directly from the text give me...Without quoting directly from the text give me...summarizationBrendon Small is a stand-up comedian, Creator...14205
7399Is Killing is Sin ? Is it tureIs Killing is Sin ? Is it ture?\\nKilling can b...Is Killing is Sin ? Is it ture?\\nKilling is no...brainstormingKilling a human being should not be sin becaus...11253
7400Who was Otto von Bismarck?\\nOtto, Prince of Bi...Who was Otto von Bismarck?\\nOtto von Bismarck ...Who was Otto von Bismarck?\\nOtto von Bismarck ...information_extractionOtto von Bismarck was a Prussian and German so...12872
\n", - "

7401 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " prompt \\\n", - "0 What is Depreciation \n", - "1 What do you know about the city of Aberdeen in... \n", - "2 Describe thunderstorm season in the United Sta... \n", - "3 When did Peloton IPO?\\nOn September 26, 2019, ... \n", - "4 What is the best way to answer an interview qu... \n", - "... ... \n", - "7396 How do i accept the change \n", - "7397 Extract the teams that the footballer Sócrates... \n", - "7398 Without quoting directly from the text give me... \n", - "7399 Is Killing is Sin ? Is it ture \n", - "7400 Who was Otto von Bismarck?\\nOtto, Prince of Bi... \n", - "\n", - " response-1 \\\n", - "0 What is Depreciation – 10 Important Facts to K... \n", - "1 Aberdeen, the city in which I've lived for the... \n", - "2 Describe thunderstorm season in the United Sta... \n", - "3 When did Peloton IPO?\\nPeloton launched its in... \n", - "4 The best way to answer an interview question m... \n", - "... ... \n", - "7396 How do i accept the change in my life\\nAccepti... \n", - "7397 Extract the teams that the footballer Sócrates... \n", - "7398 Without quoting directly from the text give me... \n", - "7399 Is Killing is Sin ? Is it ture?\\nKilling can b... \n", - "7400 Who was Otto von Bismarck?\\nOtto von Bismarck ... \n", - "\n", - " response-2 \\\n", - "0 What is Depreciation on a Car?\\nDepreciation i... \n", - "1 As an AI language model, I don't have personal... \n", - "2 Describe thunderstorm season in the United Sta... \n", - "3 When did Peloton IPO?\\nPeloton IPO'd on May 26... \n", - "4 Some of the best ways to answer an interview q... \n", - "... ... \n", - "7396 I's a great opportunity to improve. The only t... \n", - "7397 Extract the teams that the footballer Sócrates... \n", - "7398 Without quoting directly from the text give me... \n", - "7399 Is Killing is Sin ? Is it ture?\\nKilling is no... \n", - "7400 Who was Otto von Bismarck?\\nOtto von Bismarck ... \n", - "\n", - " category \\\n", - "0 open_qa \n", - "1 open_qa \n", - "2 information_extraction \n", - "3 closed_qa \n", - "4 creative_writing \n", - "... ... \n", - "7396 brainstorming \n", - "7397 information_extraction \n", - "7398 summarization \n", - "7399 brainstorming \n", - "7400 information_extraction \n", - "\n", - " original_response external_id \n", - "0 Depreciation is the drop in value of an asset ... 518 \n", - "1 Aberdeen is a city located in the North East o... 351 \n", - "2 Thunderstorm season in the United States and C... 10567 \n", - "3 Peloton became a public company via an initial... 12412 \n", - "4 The first recommended step is to ask clarifyin... 2521 \n", - "... ... ... \n", - "7396 Embrace the change and see the difference 15010 \n", - "7397 Brazil, Botafogo-SP, Corinthians, Fiorentina 9970 \n", - "7398 Brendon Small is a stand-up comedian, Creator... 14205 \n", - "7399 Killing a human being should not be sin becaus... 11253 \n", - "7400 Otto von Bismarck was a Prussian and German so... 12872 \n", - "\n", - "[7401 rows x 6 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } + "1eca0bb4151543098cfbb98b64191f92": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_299f4ae7c6524b9d8a06f22063d5a92f", + "IPY_MODEL_4ed8d36201904c6cb43d0fcdf7b18f34", + "IPY_MODEL_b9ed9f3bcbfd4403b4c1ea9eb3001543" ], - "source": [ - "df = hf_dataset.to_pandas()\n", - "df" - ] + "layout": "IPY_MODEL_a45c853e3a7c4b65ad3bdae9a67607bc" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "-OMxM53bMBYZ" - }, - "source": [ - "For reward modeling, we would like to ask labelers to rank two responses for a prompt from best to worst. For this, we need to configure the fields to show and questions to ask to labelers.\n", - "\n", - "The dataset will show the users three fields `instruction`, which corresponds to the prompt, `response-1` and `response-2`, which correspond to each of the responses to the instruction.\n" - ] + "23cccf0f1f2240ecaabb5d6efa7a8bbb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "VZ5prVQNMBYa" - }, - "outputs": [], - "source": [ - "# list of fields that we will use later for our dataset settings\n", - "fields = [\n", - " rg.TextField(name=\"instruction\", title=\"User instruction\"),\n", - " rg.TextField(name=\"response-1\"),\n", - " rg.TextField(name=\"response-2\")\n", - "]" - ] + "23ef7fb2393f45d885d11ada8e5a3534": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "VKU888AnMBYa" - }, - "source": [ - "In this use case, we will ask the users to select the best response. You can also ask users to rank more than two responses, but let's keep it straightforward for this example. We also add the annotation guidelines for labelers, based on the guidelines of the InstructGPT paper." - ] + "254c7bb9621c457182976287a0a5ac66": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "wJvW18MEMBYb" - }, - "outputs": [], - "source": [ - "question = rg.RatingQuestion(\n", - " name=\"choose-best\",\n", - " title=\"Choose the best response:\",\n", - " description=\"Choose the most helpful, harmless, and truthful response. Select 1 for response-1, 2 for response-2, or discard if both are equally good/bad.\",\n", - " values=[1,2],\n", - " required=True\n", - ")\n", - "\n", - "guidelines=\"\"\"These guidelines are based on the paper [Training Language Models to Follow Instructions with Human Feedback](https://cdn.openai.com/papers/Training_language_models_to_follow_instructions_with_human_feedback.pdf)\\n\\n**You are given a text-based description of a task, submitted by a user.**\\n\\nThis task description may be in the form of an explicit instruction (e.g. \\\"Write a story about a wise frog.\\\"). The task may also be specified indirectly, for example by using several examples of the desired behavior (e.g. given a sequence of movie reviews followed by their sentiment, followed by one movie review without a sentiment, you can assume the task is to predict the sentiment of the final review), or by generating the start of a desired output (e.g. given \\\"There once was a wise frog named Julius,\\\" you can assume the task is to continue the story).\\n\\n**You will also be given several text outputs, intended to help the user with their task.** Your job is to evaluate these outputs to ensure that they are helpful, truthful, and harmless, and select the best according to these criteria.\\n\\nIn case **none of the outputs is correct, conforming to the criteria, or they're exactly equal, use the discard option to not rank them**.\\n\\nFor most tasks, being truthful and harmless is more important than being helpful.\\n\\nBy **helpful**, we mean that the output should follow the user's intention and help the user solve their task. Some examples of helpful behavior:\\n\\n- Writing in clear language.\\n- Answering the question they meant to ask, even if they mis-asked it.\\n- Being sensitive to internationality (e.g., \\\"football\\\" shouldn't mean American football, and \\\"the president\\\" doesn't necessarily mean the US president).\\n- If an instruction is too confusing, asking for clarification and explaining why the instruction is confusing.\\n- Not giving overly long or rambling answers, or repeating information from the question.\\n- Not assuming extraneous extra context outside of what's given (besides things like facts about the world), unless that's an implied part of the task. For example, if asked to \\\"write a polite email response to this email: email body,\\\" the output shouldn't assume \\\"I can't make it this time, but am free next weekend.\\\" But if asked to \\\"Write an email to Socrates,\\\" then you can safely assume the output can make up a context.\\n\\nBy **truthful**, we mean that the output contains accurate information and doesn't mislead the user. Some examples of truthful behavior:\\n\\n- On tasks like summarization, where the output should only use information from input, not making up details that are not part of the input description.\\n- Not producing clearly false information about the world (e.g., making up facts or promoting conspiracies). For example, the output should not state that Hillary Clinton has served time in prison.\\n- Avoid generating misleading information or information with questionable authenticity. For example, if the input asks \\\"Why did Hillary Clinton go to jail?\\\", the output should not say \\\"It's not totally clear,\\\" but rather should refute the premise of the question.\\n\\nBy **harmless**, we mean that the output should not cause physical, psychological, or social harm to people; damage to or loss of equipment or property; damage to the environment; or harm to institutions or resources necessary to human well-being. Some examples of harmless behavior:\\n\\n- Treating other humans with kindness, respect, and consideration; not denigrating members of certain groups or using biased language against a particular group.\\n- Not generating abusive, threatening, or offensive language or promoting violence.\\n- Not writing sexual or violent content if it's not asked for.\\n- Not giving bad real-world advice or promoting illegal activity.\\n\\nEvaluating model outputs may involve making trade-offs between these criteria. These trade-offs will depend on the task.\"\"\"" - ] + "299f4ae7c6524b9d8a06f22063d5a92f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fc422a04dbcf4b979fa7569c171ac971", + "placeholder": "​", + "style": "IPY_MODEL_a291ef8013ee445b8f3e36be30e1da69", + "value": "Downloading data: 100%" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "RfZuuRpr07n4" - }, - "source": [ - "## Create records\n", - "\n", - "The next step is to build the records for collecting comparison data. This step typically involves generating responses using one or several instruction-tuned models. \n", - "\n", - ".. tip:: When showing responses from two different models to labelers it's recommended to randomly assign different model responses to `response-1` and `response-2` for each record. Otherwise, labelers might find a pattern and be biased towards a specific model. This is especially relevant for model comparison and evaluation but also applies to comparison data for reward modeling. \n", - "\n", - "In this example, we've already generated a dataset using the instructions from the Dolly curated dataset with the Falcon-7B-instruct model. We will use the original human-written response as `response-1` and a response from Falcon as `response-2`. \n", - "\n", - "You can build the records and publish them for labelers as follows:" - ] + "2a9d2460cf024776bb0010ff6f1a5fa7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6e00a979fa604747b320018d4d5549ad", + "max": 7401, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5ed78905729747d8a3dfe68590950d73", + "value": 7401 + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "qZUeAT-1OFfz", - "outputId": "2b605fd6-3708-4f8c-d18a-4dc1f75f981e" - }, - "outputs": [], - "source": [ - "# build records from hf dataset\n", - "records = [\n", - " rg.FeedbackRecord(fields={\"instruction\": r[\"prompt\"], \"response-1\": r[\"original_response\"], \"response-2\": r[\"response-2\"]})\n", - " for r in hf_dataset\n", - "]\n", - "\n", - "# create dataset\n", - "dataset = rg.FeedbackDataset(\n", - " fields=fields,\n", - " questions=[question],\n", - " guidelines=guidelines\n", - ")\n", - "\n", - "# add records and publish\n", - "dataset.add_records(records)\n", - "dataset.push_to_argilla(\"comparison-data-falcon\", workspace=\"admin\")\n" - ] + "2b34ee7a285a4dbb8c1001d8162b8e44": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now the dataset is ready for labeling. This is the Feedback UI we have just configured:\n", - "\n", - "\n", - "\"A\n", - "\n" - ] + "324c48fb64fb4021a4acec428fcf53ac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "4vUhGWWnufDq" - }, - "source": [ - "Additionally, you can push the dataset to the Hub for reproducibility and reuse. This dataset is available in the Hub, feel free to [read the dataset card](https://huggingface.co/datasets/argilla/comparison-data-falcon-with-feedback) to understand its structure, annotation guidelines, and how to import it." - ] + "3aaa39cccc26416f9c7aeab4d50da8c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0kqc8fADoX4Q" - }, - "outputs": [], - "source": [ - "#dataset.push_to_huggingface(\"comparison-data-falcon\")" - ] + "3cc6da03d28447d0a7259c460b017296": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ffc2b422e5284a3180e703cab2ebef1e", + "IPY_MODEL_5151b3406f774c57b66c0651e10edb42", + "IPY_MODEL_b04603b9ced743d991a61e42fc0d8ce6" + ], + "layout": "IPY_MODEL_99f75e3565334a0cad80b0ddf434156c" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "aW40aC5xvhrR" - }, - "source": [ - "## Collect feedback and prepare the dataset\n", - "\n", - "Once the data has been labeled using the Argilla UI, we can retrieve it with the Python SDK and prepare it for training the reward model with TRL.\n", - "\n", - "If you are running this tutorial but haven´t labeled any data points, execute the following cell to retrieve the labeled dataset from the Hugging Face Hub. This dataset already contains ranked responses and can be used for the next steps. The dataset is available in the Hub, feel free to [read the dataset card](https://huggingface.co/datasets/argilla/comparison-data-falcon-with-feedback) to understand its structure, annotation guidelines, responses, and how to import it. \n", - "\n", - "If you have labeled some examples jump and execute the second cell. " - ] + "4d451a340eb341ee88002e9ecce1d2a9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 238, - "referenced_widgets": [ - "b4a7efcf448247c586546cee7ae4e853", - "ad4dd7f68c00437e878e2965e08457c8", - "e7b96b86f6df4826b04171eaec7fa2a6", - "56021dd10ea447029b2d64a3954f50a8", - "ff6274caa2cb4346b9862dcbf1846184", - "a681213d4f1f4c0db8cf2be709734067", - "626d87ba234a4af9aed53f633cefbd95", - "1c28d5ed9c7a41ad87e7fd83380e05a5", - "dab35d7560824b828e1976691e481301", - "23cccf0f1f2240ecaabb5d6efa7a8bbb", - "f5d4cf0d204f478094976a30309de13e", - "1eca0bb4151543098cfbb98b64191f92", - "299f4ae7c6524b9d8a06f22063d5a92f", - "4ed8d36201904c6cb43d0fcdf7b18f34", - "b9ed9f3bcbfd4403b4c1ea9eb3001543", - "a45c853e3a7c4b65ad3bdae9a67607bc", - "fc422a04dbcf4b979fa7569c171ac971", - "a291ef8013ee445b8f3e36be30e1da69", - "7ba0eca5e7984391b4d32608c85bd236", - "0e746ea7187c4b7382de0e5094d6234a", - "324c48fb64fb4021a4acec428fcf53ac", - "841a58e70b3f4621a4419d2872b3d348", - "3cc6da03d28447d0a7259c460b017296", - "ffc2b422e5284a3180e703cab2ebef1e", - "5151b3406f774c57b66c0651e10edb42", - "b04603b9ced743d991a61e42fc0d8ce6", - "99f75e3565334a0cad80b0ddf434156c", - "e295f279b61f48d683784131a8d533cc", - "254c7bb9621c457182976287a0a5ac66", - "9e60f5105a2f430d9c5f382c54b0610c", - "2b34ee7a285a4dbb8c1001d8162b8e44", - "4d451a340eb341ee88002e9ecce1d2a9", - "8c1c4cac8f0444e4a69bd316efde9080", - "b3dff83de3894147838358514c8ef63c", - "ac4deaed71894ca9b8bdba7e14a34212", - "8ed4b17c0dc4464cb5cc8bcfbc6bc803", - "9a45be3cbddc4d0cbcc3591fe2c2883e", - "51d0e514f6244692b2b1e6dbfcc0a460", - "0432b133dc614d1fa8e2d06edc413302", - "ffe071addb80421dba36b5797a594dbe", - "17ed61c237304d2bb77b80bd4def6466", - "809240690fb44738b6173cc53504bcf1", - "5d3a83515f8944d491fe3305ee01a314", - "52d358e242dd4876a66adbdb89fb03fc", - "dd782a3398d1437f9d3626bdf4b87be4", - "f632551058f744b0b9ad9696397c8d53", - "914b7ab6f21d486fa42be37545ea73d7", - "94903b2e56ef43b9b39ddd576a5a06ee", - "902a2c7722484d4283e5ba0757641a0e", - "23ef7fb2393f45d885d11ada8e5a3534", - "3aaa39cccc26416f9c7aeab4d50da8c5", - "04c0309dc09e46419995addb7238f5ee", - "a9dcbe78ae6641d38775818748dd71c4", - "5c23c54cc6bd4475ba755490afb520db", - "bda482b92a8d4229983e72581154eba9" - ] - }, - "id": "p-0_iwa8IA2y", - "outputId": "abbc3e3a-6053-4a0e-c1a1-f3f5f32d686c" - }, - "outputs": [], - "source": [ - "# if you haven't ranked any responses with the UI run this cell\n", - "# otherwise ran the next one\n", - "feedback_dataset = rg.FeedbackDataset.from_huggingface(\"argilla/comparison-data-falcon-with-feedback\")" - ] + "4ed8d36201904c6cb43d0fcdf7b18f34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7ba0eca5e7984391b4d32608c85bd236", + "max": 5211692, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0e746ea7187c4b7382de0e5094d6234a", + "value": 5211692 + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "TZGfBzgmvg1d", - "outputId": "7efee8d2-79b2-4555-a489-f8a55fcbec31" - }, - "outputs": [], - "source": [ - "# run this cell if you have ranked the responses in the UI\n", - "feedback_dataset = rg.FeedbackDataset.from_argilla('comparison-data-falcon', workspace=\"admin\")" - ] + "5151b3406f774c57b66c0651e10edb42": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9e60f5105a2f430d9c5f382c54b0610c", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2b34ee7a285a4dbb8c1001d8162b8e44", + "value": 1 + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The next step is to prepare the dataset in the standard format for training a reward model. In particular, we want to select the chosen and rejected responses from the user feedback. We do this by creating a `TrainingTask` instance for reward modeling using a function that returns chosen-rejected tuples." - ] + "51d0e514f6244692b2b1e6dbfcc0a460": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 537 - }, - "id": "cll7j10swNT2", - "outputId": "0e4974e6-74b1-41be-c54c-92bca9732ddc" - }, - "outputs": [], - "source": [ - "from typing import Any, Dict\n", - "from argilla.feedback import TrainingTask\n", - "from collections import Counter\n", - "\n", - "def formatting_func(sample: Dict[str, Any]):\n", - " # sample[\"choose-best\"] => [{'user_id': None, 'value': 1, 'status': 'submitted'}, ...]\n", - " values = [\n", - " annotation[\"value\"]\n", - " for annotation in sample[\"choose-best\"]\n", - " if annotation[\"status\"] == \"submitted\"\n", - " ]\n", - " \n", - " # We will only focus on the annotated records in the dataset\n", - " if Counter(values).most_common(1) != []:\n", - " \n", - " # values => [1]\n", - " winning_response = Counter(values).most_common(1)[0][0]\n", - " print(Counter(values).most_common(1))\n", - " if winning_response == 1:\n", - " chosen = sample[\"response-1\"]\n", - " rejected = sample[\"response-2\"]\n", - " else:\n", - " chosen = sample[\"response-2\"]\n", - " rejected = sample[\"response-1\"]\n", - " return chosen, rejected\n", - "\n", - "task = TrainingTask.for_reward_modeling(formatting_func=formatting_func)" - ] + "52d358e242dd4876a66adbdb89fb03fc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we want, we can observe the resulting dataset by preparing it for training with TRL:" - ] + "56021dd10ea447029b2d64a3954f50a8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_23cccf0f1f2240ecaabb5d6efa7a8bbb", + "placeholder": "​", + "style": "IPY_MODEL_f5d4cf0d204f478094976a30309de13e", + "value": " 1/1 [00:04<00:00, 4.35s/it]" + } }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dataset({\n", - " features: ['chosen', 'rejected'],\n", - " num_rows: 7401\n", - "})" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } + "5c23c54cc6bd4475ba755490afb520db": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5d3a83515f8944d491fe3305ee01a314": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5ed78905729747d8a3dfe68590950d73": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "626d87ba234a4af9aed53f633cefbd95": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "660b25b7377a4bd0a2baf03a437dc060": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6e00a979fa604747b320018d4d5549ad": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7ba0eca5e7984391b4d32608c85bd236": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d01e2401fa44199afb31d5dcc443761": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "809240690fb44738b6173cc53504bcf1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "841a58e70b3f4621a4419d2872b3d348": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8a6d78da941d429b981d82a1b0d249ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fdd275ff03504203b5dbce2ea732fd44", + "placeholder": "​", + "style": "IPY_MODEL_7d01e2401fa44199afb31d5dcc443761", + "value": " 7378/7401 [00:23<00:00, 318.13 examples/s]" + } + }, + "8b2770c23c424f8698d7f3ca60bf5d9a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "8c1c4cac8f0444e4a69bd316efde9080": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8ed4b17c0dc4464cb5cc8bcfbc6bc803": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "info", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_17ed61c237304d2bb77b80bd4def6466", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_809240690fb44738b6173cc53504bcf1", + "value": 1 + } + }, + "902a2c7722484d4283e5ba0757641a0e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "914b7ab6f21d486fa42be37545ea73d7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_04c0309dc09e46419995addb7238f5ee", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a9dcbe78ae6641d38775818748dd71c4", + "value": 1 + } + }, + "94903b2e56ef43b9b39ddd576a5a06ee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5c23c54cc6bd4475ba755490afb520db", + "placeholder": "​", + "style": "IPY_MODEL_bda482b92a8d4229983e72581154eba9", + "value": " 1/1 [00:00<00:00, 17.55it/s]" + } + }, + "99f75e3565334a0cad80b0ddf434156c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a45be3cbddc4d0cbcc3591fe2c2883e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5d3a83515f8944d491fe3305ee01a314", + "placeholder": "​", + "style": "IPY_MODEL_52d358e242dd4876a66adbdb89fb03fc", + "value": " 7401/0 [00:00<00:00, 62827.72 examples/s]" + } + }, + "9a872753cf9a4fde9caa69660e85c896": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9fc9251942db44548dcc9b55312f6963", + "placeholder": "​", + "style": "IPY_MODEL_660b25b7377a4bd0a2baf03a437dc060", + "value": "Map: 100%" + } + }, + "9e60f5105a2f430d9c5f382c54b0610c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9fc9251942db44548dcc9b55312f6963": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a291ef8013ee445b8f3e36be30e1da69": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a45c853e3a7c4b65ad3bdae9a67607bc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a681213d4f1f4c0db8cf2be709734067": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9dcbe78ae6641d38775818748dd71c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ac4deaed71894ca9b8bdba7e14a34212": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0432b133dc614d1fa8e2d06edc413302", + "placeholder": "​", + "style": "IPY_MODEL_ffe071addb80421dba36b5797a594dbe", + "value": "Generating train split: " + } + }, + "ad4dd7f68c00437e878e2965e08457c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a681213d4f1f4c0db8cf2be709734067", + "placeholder": "​", + "style": "IPY_MODEL_626d87ba234a4af9aed53f633cefbd95", + "value": "Downloading data files: 100%" + } + }, + "b04603b9ced743d991a61e42fc0d8ce6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4d451a340eb341ee88002e9ecce1d2a9", + "placeholder": "​", + "style": "IPY_MODEL_8c1c4cac8f0444e4a69bd316efde9080", + "value": " 1/1 [00:00<00:00, 26.03it/s]" + } + }, + "b3dff83de3894147838358514c8ef63c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ac4deaed71894ca9b8bdba7e14a34212", + "IPY_MODEL_8ed4b17c0dc4464cb5cc8bcfbc6bc803", + "IPY_MODEL_9a45be3cbddc4d0cbcc3591fe2c2883e" ], - "source": [ - "dataset = feedback_dataset.prepare_for_training(framework=\"trl\", task=task)\n", - "dataset" - ] + "layout": "IPY_MODEL_51d0e514f6244692b2b1e6dbfcc0a460" + } }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'chosen': \"Depreciation is the drop in value of an asset due to wear and tear, age and obsolescence (going out of date) as recorded in an organization's financial records.\",\n", - " 'rejected': 'What is Depreciation – 10 Important Facts to Know?\\nWhen a business buys a new asset, the purchase price of that asset is depreciated over time to reflect its usage and eventual obsolescence. Depreciation expense can be a tax deductible expense and is usually a non-cash expense reported on a company’s income statement and balance sheet. The amount of depreciation expense a company reports each year is the difference between the original purchase price of the asset and what the current value of that asset might be. Here are 10 important facts to know about depreciation:\\n1. Depreciation is a non-cash expense. It is an expense that is reported in a business’s income statement and balance sheet and not a cash flow expense.\\n2. Depreciation is an accounting standard and it is required to be disclosed in a business’s financial statements.\\n3. The amount of depreciation is usually a tax expense and not a cash expense reported on a company’s income statement'}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } + "b44d62c0bef242fabab85ff6129b42dc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9a872753cf9a4fde9caa69660e85c896", + "IPY_MODEL_2a9d2460cf024776bb0010ff6f1a5fa7", + "IPY_MODEL_8a6d78da941d429b981d82a1b0d249ab" ], - "source": [ - "dataset[0]" - ] + "layout": "IPY_MODEL_8b2770c23c424f8698d7f3ca60bf5d9a" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "UgoIyJanMBYk" - }, - "source": [ - "This dataset is ready to be used as comparison data to train a reward model.\n", - "\n", - "
\n", - "\n", - "Note\n", - "\n", - "The paper Direct Preference Optimization: Your Language Model is Secretly a Reward Model proposes DPO, a promising method for using comparison data directly to model human preference, eliminating the need for a reward model and the RL step. Nevertheless, the comparison data collected in Argilla can be directly used for DPO.\n", - "\n", - "
" - ] + "b4a7efcf448247c586546cee7ae4e853": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ad4dd7f68c00437e878e2965e08457c8", + "IPY_MODEL_e7b96b86f6df4826b04171eaec7fa2a6", + "IPY_MODEL_56021dd10ea447029b2d64a3954f50a8" + ], + "layout": "IPY_MODEL_ff6274caa2cb4346b9862dcbf1846184" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "F8r9_XJi1K9P" - }, - "source": [ - "## Train the reward model with `trl`\n", - "In this step, we will use the `RewardTrainer` from the trl library. To understand this step, we recommend you to [check the trl docs](https://huggingface.co/docs/trl/reward_trainer). \n", - "\n", - "
\n", - "To run this step, you need to rank some examples using the Argilla UI, or run the step above with the load from Hugging Face call: `feedback_dataset = FeedbackDataset.from_huggingface`\n", - "
\n", - "\n", - "To train a Reward Model, you need to choose a base model to fine-tune. In the literature, the base model is typically the supervised fine-tuned model resulting from the instruction-tuning step. In this example, that would mean using the [Falcon-7B-instruct model](https://huggingface.co/tiiuae/falcon-7b-instruct). However, as Reward Models are essentially classifiers you can use a more lightweight backbone model, for this example we will use `distilroberta-base` but feel free to experiment with other models.\n", - "\n", - "The code below fine-tunes a `SequenceClassification` model with our preference dataset. The most interesting part is the `formatting_func` function. This function combines instructions with chosen and rejected responses, creating two new strings. These strings are tokenized, becoming input for a reward model that learns to distinguish between good and bad responses based on these examples. The model will be optimized to assign higher values to preferred responses and lower values to rejected responses.\n" - ] + "b9ed9f3bcbfd4403b4c1ea9eb3001543": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_324c48fb64fb4021a4acec428fcf53ac", + "placeholder": "​", + "style": "IPY_MODEL_841a58e70b3f4621a4419d2872b3d348", + "value": " 5.21M/5.21M [00:01<00:00, 5.76MB/s]" + } }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 516, - "referenced_widgets": [ - "b44d62c0bef242fabab85ff6129b42dc", - "9a872753cf9a4fde9caa69660e85c896", - "2a9d2460cf024776bb0010ff6f1a5fa7", - "8a6d78da941d429b981d82a1b0d249ab", - "8b2770c23c424f8698d7f3ca60bf5d9a", - "9fc9251942db44548dcc9b55312f6963", - "660b25b7377a4bd0a2baf03a437dc060", - "6e00a979fa604747b320018d4d5549ad", - "5ed78905729747d8a3dfe68590950d73", - "fdd275ff03504203b5dbce2ea732fd44", - "7d01e2401fa44199afb31d5dcc443761" - ] - }, - "id": "-MtSUam61J_Z", - "outputId": "6680758c-3b9c-444a-e73b-48bbb4c1b4b5" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
[08/08/23 16:36:51] INFO     INFO:ArgillaTRLTrainer:{'eval_loss': 0.1626577377319336, 'eval_accuracy':   trl.py:226\n",
-              "                             0.937204591492235, 'eval_runtime': 6.5907, 'eval_samples_per_second':                 \n",
-              "                             224.709, 'eval_steps_per_second': 28.221, 'epoch': 1.0}                               \n",
-              "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08/08/23 16:36:51]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m INFO:ArgillaTRLTrainer:\u001b[1m{\u001b[0m\u001b[32m'eval_loss'\u001b[0m: \u001b[1;36m0.1626577377319336\u001b[0m, \u001b[32m'eval_accuracy'\u001b[0m: \u001b]8;id=234053;file://C:\\code\\argilla\\src\\argilla\\client\\feedback\\training\\frameworks\\trl.py\u001b\\\u001b[2mtrl.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=146316;file://C:\\code\\argilla\\src\\argilla\\client\\feedback\\training\\frameworks\\trl.py#226\u001b\\\u001b[2m226\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m0.937204591492235\u001b[0m, \u001b[32m'eval_runtime'\u001b[0m: \u001b[1;36m6.5907\u001b[0m, \u001b[32m'eval_samples_per_second'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m224.709\u001b[0m, \u001b[32m'eval_steps_per_second'\u001b[0m: \u001b[1;36m28.221\u001b[0m, \u001b[32m'epoch'\u001b[0m: \u001b[1;36m1.0\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "bda482b92a8d4229983e72581154eba9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dab35d7560824b828e1976691e481301": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "dd782a3398d1437f9d3626bdf4b87be4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f632551058f744b0b9ad9696397c8d53", + "IPY_MODEL_914b7ab6f21d486fa42be37545ea73d7", + "IPY_MODEL_94903b2e56ef43b9b39ddd576a5a06ee" ], - "source": [ - "from argilla.feedback import ArgillaTrainer\n", - "\n", - "model_name = \"distilroberta-base\"\n", - "trainer = ArgillaTrainer(\n", - " dataset=feedback_dataset,\n", - " task=task,\n", - " framework=\"trl\",\n", - " model=model_name,\n", - " train_size=0.8,\n", - ")\n", - "trainer.update_config(\n", - " per_device_train_batch_size=16,\n", - " evaluation_strategy=\"steps\",\n", - " logging_steps=200,\n", - ")\n", - "trainer.train(\"./reward_model\")\n" - ] + "layout": "IPY_MODEL_902a2c7722484d4283e5ba0757641a0e" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using the Reward Model\n", - "The resulting model is [fully open-source and available on the Hugging Hub](https://huggingface.co/argilla/roberta-base-reward-model-falcon-dolly).\n", - "\n", - "\n", - "This is how you can use it with your own data:" - ] + "e295f279b61f48d683784131a8d533cc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", - "\n", - "tokenizer = AutoTokenizer.from_pretrained(\"argilla/roberta-base-reward-model-falcon-dolly\")\n", - "\n", - "model = AutoModelForSequenceClassification.from_pretrained(\"argilla/roberta-base-reward-model-falcon-dolly\")\n", - "\n", - "def get_score(model, tokenizer, prompt, response):\n", - " # Tokenize the input sequences\n", - " inputs = tokenizer.encode_plus(prompt, response, truncation=True, padding=\"max_length\", max_length=512, return_tensors=\"pt\")\n", - "\n", - " # Perform forward pass\n", - " with torch.no_grad():\n", - " outputs = model(**inputs)\n", - "\n", - " # Extract the logits\n", - " logits = outputs.logits\n", - "\n", - " return logits.item()\n", - "\n", - "# Example usage\n", - "prompt = \"What is Depreciation\"\n", - "example_less_pref_response = \"What is Depreciation – 10 Important Facts to Know? When a business buys a new asset, the purchase price of that asset is depreciated over time to reflect its usage and eventual obsolescence. Depreciation expense can be a tax deductible expense and is usually a non-cash expense reported on a company’s income statement and balance sheet. The amount of depreciation expense a company reports each year is the difference between the original purchase price of the asset and what the current value of that asset might be. Here are 10 important facts to know about depreciation: 1. Depreciation is a non-cash expense. It is an expense that is reported in a business’s income statement and balance sheet and not a cash flow expense. 2. Depreciation is an accounting standard and it is required to be disclosed in a business’s financial statements. 3. The amount of depreciation is usually a tax expense and not a cash expense reported on a company’s income statement\"\n", - "example_preferred_response = \"Depreciation is the drop in value of an asset due to wear and tear, age and obsolescence (going out of date) as recorded in an organization's financial records.\"\n", - "\n", - "score = get_score(model, tokenizer, prompt, example_less_pref_response)\n", - "print(score)\n", - "# >> -3.915163993835449\n", - "\n", - "score = get_score(model, tokenizer, prompt, example_preferred_response)\n", - "print(score)\n", - "# >> 7.460323333740234" - ] + "e7b96b86f6df4826b04171eaec7fa2a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1c28d5ed9c7a41ad87e7fd83380e05a5", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dab35d7560824b828e1976691e481301", + "value": 1 + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "cxy90ZY4MBYk" - }, - "source": [ - "## Summary\n", - "\n", - "In this tutorial, we learned how to create a comparison dataset by ranking responses from the Dolly dataset and Falcon. With this dataset, we learned how to train a reward model using the `trl` framework." - ] + "f5d4cf0d204f478094976a30309de13e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "BeKXm3s3IFr9" - }, - "source": [ - "## Appendix: How to build the dataset with pre-loaded responses" - ] + "f632551058f744b0b9ad9696397c8d53": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_23ef7fb2393f45d885d11ada8e5a3534", + "placeholder": "​", + "style": "IPY_MODEL_3aaa39cccc26416f9c7aeab4d50da8c5", + "value": "100%" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "efyVbRYXIL6H" - }, - "outputs": [], - "source": [ - "picker = [\"response-1\", \"response-2\"]\n", - "\n", - "def get_chosen_and_not_chosen(l):\n", - " # Generate a random index between 0 and length of the list - 1\n", - " chosen_id = random.randint(0, len(l) - 1)\n", - " not_chosen_id = 1 - chosen_id # This will be 0 if chosen_id is 1 and vice versa\n", - "\n", - " return l[chosen_id], l[not_chosen_id], chosen_id\n", - "\n", - "records = []\n", - "\n", - "for r in hf_dataset:\n", - " chosen, not_chosen, chosen_id = get_chosen_and_not_chosen(picker)\n", - " chosen_from_falcon, _, _ = get_chosen_and_not_chosen(picker)\n", - " \n", - " record = rg.FeedbackRecord(\n", - " fields={ \"instruction\": r[\"prompt\"], chosen: r[\"original_response\"], not_chosen: r[chosen_from_falcon]},\n", - " responses = [{\"values\": {\"choose-best\": {\"value\": chosen_id+1}}}],\n", - " external_id=r['external_id']\n", - " )\n", - " records.append(record)\n", - "\n", - "# create dataset\n", - "dataset = rg.FeedbackDataset(\n", - " fields=fields,\n", - " questions=[question],\n", - " guidelines=guidelines\n", - ")\n", - "\n", - "# add records and publish\n", - "dataset.add_records(records)\n", - "\n", - "dataset.push_to_huggingface(\"argilla/comparison-data-falcon-with-feedback\")" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - }, - "vscode": { - "interpreter": { - "hash": "2d98cb9bf90a932b5bf8e86e91214497eb0e38eb318595fbd6fbd5460fe92036" - } - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "0432b133dc614d1fa8e2d06edc413302": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "04c0309dc09e46419995addb7238f5ee": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0e746ea7187c4b7382de0e5094d6234a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "17ed61c237304d2bb77b80bd4def6466": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": "20px" - } - }, - "1c28d5ed9c7a41ad87e7fd83380e05a5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1eca0bb4151543098cfbb98b64191f92": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_299f4ae7c6524b9d8a06f22063d5a92f", - "IPY_MODEL_4ed8d36201904c6cb43d0fcdf7b18f34", - "IPY_MODEL_b9ed9f3bcbfd4403b4c1ea9eb3001543" - ], - "layout": "IPY_MODEL_a45c853e3a7c4b65ad3bdae9a67607bc" - } - }, - "23cccf0f1f2240ecaabb5d6efa7a8bbb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "23ef7fb2393f45d885d11ada8e5a3534": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "254c7bb9621c457182976287a0a5ac66": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "299f4ae7c6524b9d8a06f22063d5a92f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fc422a04dbcf4b979fa7569c171ac971", - "placeholder": "​", - "style": "IPY_MODEL_a291ef8013ee445b8f3e36be30e1da69", - "value": "Downloading data: 100%" - } - }, - "2a9d2460cf024776bb0010ff6f1a5fa7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6e00a979fa604747b320018d4d5549ad", - "max": 7401, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5ed78905729747d8a3dfe68590950d73", - "value": 7401 - } - }, - "2b34ee7a285a4dbb8c1001d8162b8e44": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "324c48fb64fb4021a4acec428fcf53ac": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3aaa39cccc26416f9c7aeab4d50da8c5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3cc6da03d28447d0a7259c460b017296": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ffc2b422e5284a3180e703cab2ebef1e", - "IPY_MODEL_5151b3406f774c57b66c0651e10edb42", - "IPY_MODEL_b04603b9ced743d991a61e42fc0d8ce6" - ], - "layout": "IPY_MODEL_99f75e3565334a0cad80b0ddf434156c" - } - }, - "4d451a340eb341ee88002e9ecce1d2a9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4ed8d36201904c6cb43d0fcdf7b18f34": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7ba0eca5e7984391b4d32608c85bd236", - "max": 5211692, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0e746ea7187c4b7382de0e5094d6234a", - "value": 5211692 - } - }, - "5151b3406f774c57b66c0651e10edb42": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9e60f5105a2f430d9c5f382c54b0610c", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2b34ee7a285a4dbb8c1001d8162b8e44", - "value": 1 - } - }, - "51d0e514f6244692b2b1e6dbfcc0a460": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "52d358e242dd4876a66adbdb89fb03fc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "56021dd10ea447029b2d64a3954f50a8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_23cccf0f1f2240ecaabb5d6efa7a8bbb", - "placeholder": "​", - "style": "IPY_MODEL_f5d4cf0d204f478094976a30309de13e", - "value": " 1/1 [00:04<00:00, 4.35s/it]" - } - }, - "5c23c54cc6bd4475ba755490afb520db": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5d3a83515f8944d491fe3305ee01a314": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5ed78905729747d8a3dfe68590950d73": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "626d87ba234a4af9aed53f633cefbd95": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "660b25b7377a4bd0a2baf03a437dc060": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6e00a979fa604747b320018d4d5549ad": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7ba0eca5e7984391b4d32608c85bd236": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7d01e2401fa44199afb31d5dcc443761": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "809240690fb44738b6173cc53504bcf1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "841a58e70b3f4621a4419d2872b3d348": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8a6d78da941d429b981d82a1b0d249ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fdd275ff03504203b5dbce2ea732fd44", - "placeholder": "​", - "style": "IPY_MODEL_7d01e2401fa44199afb31d5dcc443761", - "value": " 7378/7401 [00:23<00:00, 318.13 examples/s]" - } - }, - "8b2770c23c424f8698d7f3ca60bf5d9a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "8c1c4cac8f0444e4a69bd316efde9080": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8ed4b17c0dc4464cb5cc8bcfbc6bc803": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "info", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_17ed61c237304d2bb77b80bd4def6466", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_809240690fb44738b6173cc53504bcf1", - "value": 1 - } - }, - "902a2c7722484d4283e5ba0757641a0e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "914b7ab6f21d486fa42be37545ea73d7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_04c0309dc09e46419995addb7238f5ee", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a9dcbe78ae6641d38775818748dd71c4", - "value": 1 - } - }, - "94903b2e56ef43b9b39ddd576a5a06ee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5c23c54cc6bd4475ba755490afb520db", - "placeholder": "​", - "style": "IPY_MODEL_bda482b92a8d4229983e72581154eba9", - "value": " 1/1 [00:00<00:00, 17.55it/s]" - } - }, - "99f75e3565334a0cad80b0ddf434156c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9a45be3cbddc4d0cbcc3591fe2c2883e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5d3a83515f8944d491fe3305ee01a314", - "placeholder": "​", - "style": "IPY_MODEL_52d358e242dd4876a66adbdb89fb03fc", - "value": " 7401/0 [00:00<00:00, 62827.72 examples/s]" - } - }, - "9a872753cf9a4fde9caa69660e85c896": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9fc9251942db44548dcc9b55312f6963", - "placeholder": "​", - "style": "IPY_MODEL_660b25b7377a4bd0a2baf03a437dc060", - "value": "Map: 100%" - } - }, - "9e60f5105a2f430d9c5f382c54b0610c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9fc9251942db44548dcc9b55312f6963": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a291ef8013ee445b8f3e36be30e1da69": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a45c853e3a7c4b65ad3bdae9a67607bc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a681213d4f1f4c0db8cf2be709734067": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a9dcbe78ae6641d38775818748dd71c4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ac4deaed71894ca9b8bdba7e14a34212": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0432b133dc614d1fa8e2d06edc413302", - "placeholder": "​", - "style": "IPY_MODEL_ffe071addb80421dba36b5797a594dbe", - "value": "Generating train split: " - } - }, - "ad4dd7f68c00437e878e2965e08457c8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a681213d4f1f4c0db8cf2be709734067", - "placeholder": "​", - "style": "IPY_MODEL_626d87ba234a4af9aed53f633cefbd95", - "value": "Downloading data files: 100%" - } - }, - "b04603b9ced743d991a61e42fc0d8ce6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4d451a340eb341ee88002e9ecce1d2a9", - "placeholder": "​", - "style": "IPY_MODEL_8c1c4cac8f0444e4a69bd316efde9080", - "value": " 1/1 [00:00<00:00, 26.03it/s]" - } - }, - "b3dff83de3894147838358514c8ef63c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ac4deaed71894ca9b8bdba7e14a34212", - "IPY_MODEL_8ed4b17c0dc4464cb5cc8bcfbc6bc803", - "IPY_MODEL_9a45be3cbddc4d0cbcc3591fe2c2883e" - ], - "layout": "IPY_MODEL_51d0e514f6244692b2b1e6dbfcc0a460" - } - }, - "b44d62c0bef242fabab85ff6129b42dc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9a872753cf9a4fde9caa69660e85c896", - "IPY_MODEL_2a9d2460cf024776bb0010ff6f1a5fa7", - "IPY_MODEL_8a6d78da941d429b981d82a1b0d249ab" - ], - "layout": "IPY_MODEL_8b2770c23c424f8698d7f3ca60bf5d9a" - } - }, - "b4a7efcf448247c586546cee7ae4e853": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ad4dd7f68c00437e878e2965e08457c8", - "IPY_MODEL_e7b96b86f6df4826b04171eaec7fa2a6", - "IPY_MODEL_56021dd10ea447029b2d64a3954f50a8" - ], - "layout": "IPY_MODEL_ff6274caa2cb4346b9862dcbf1846184" - } - }, - "b9ed9f3bcbfd4403b4c1ea9eb3001543": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_324c48fb64fb4021a4acec428fcf53ac", - "placeholder": "​", - "style": "IPY_MODEL_841a58e70b3f4621a4419d2872b3d348", - "value": " 5.21M/5.21M [00:01<00:00, 5.76MB/s]" - } - }, - "bda482b92a8d4229983e72581154eba9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "dab35d7560824b828e1976691e481301": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "dd782a3398d1437f9d3626bdf4b87be4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f632551058f744b0b9ad9696397c8d53", - "IPY_MODEL_914b7ab6f21d486fa42be37545ea73d7", - "IPY_MODEL_94903b2e56ef43b9b39ddd576a5a06ee" - ], - "layout": "IPY_MODEL_902a2c7722484d4283e5ba0757641a0e" - } - }, - "e295f279b61f48d683784131a8d533cc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e7b96b86f6df4826b04171eaec7fa2a6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1c28d5ed9c7a41ad87e7fd83380e05a5", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_dab35d7560824b828e1976691e481301", - "value": 1 - } - }, - "f5d4cf0d204f478094976a30309de13e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f632551058f744b0b9ad9696397c8d53": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_23ef7fb2393f45d885d11ada8e5a3534", - "placeholder": "​", - "style": "IPY_MODEL_3aaa39cccc26416f9c7aeab4d50da8c5", - "value": "100%" - } - }, - "fc422a04dbcf4b979fa7569c171ac971": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fdd275ff03504203b5dbce2ea732fd44": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ff6274caa2cb4346b9862dcbf1846184": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ffc2b422e5284a3180e703cab2ebef1e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e295f279b61f48d683784131a8d533cc", - "placeholder": "​", - "style": "IPY_MODEL_254c7bb9621c457182976287a0a5ac66", - "value": "Extracting data files: 100%" - } - }, - "ffe071addb80421dba36b5797a594dbe": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } + "fc422a04dbcf4b979fa7569c171ac971": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fdd275ff03504203b5dbce2ea732fd44": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ff6274caa2cb4346b9862dcbf1846184": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ffc2b422e5284a3180e703cab2ebef1e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e295f279b61f48d683784131a8d533cc", + "placeholder": "​", + "style": "IPY_MODEL_254c7bb9621c457182976287a0a5ac66", + "value": "Extracting data files: 100%" + } + }, + "ffe071addb80421dba36b5797a594dbe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/trainer-feedback-setfit.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/trainer-feedback-setfit.ipynb index 68952d7144..c4bf922c68 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/trainer-feedback-setfit.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/trainer-feedback-setfit.ipynb @@ -99,10 +99,7 @@ "source": [ "# Replace api_url with the url to your local host if using Docker or your HF Spaces url\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=r\"https://.hf.space\",\n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=r\"https://.hf.space\", api_key=\"admin.apikey\")" ] }, { @@ -125,7 +122,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=r\"https://.hf.space\", \n", + "# api_url=r\"https://.hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -148,9 +145,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -289,22 +289,22 @@ "source": [ "dataset = rg.FeedbackDataset(\n", " guidelines=\"Add some guidelines for the annotation team here.\",\n", - " fields=[\n", - " rg.TextField(name=\"text\", title=\"banking topics\")\n", - " ],\n", + " fields=[rg.TextField(name=\"text\", title=\"banking topics\")],\n", " questions=[\n", " rg.MultiLabelQuestion(\n", " name=\"topics\",\n", " title=\"Select the topic(s) of the request\",\n", - " labels=data.info.features['label'].names, #these are the original labels present in the dataset\n", - " visible_labels=10\n", + " labels=data.info.features[\n", + " \"label\"\n", + " ].names, # these are the original labels present in the dataset\n", + " visible_labels=10,\n", " ),\n", " rg.LabelQuestion(\n", " name=\"sentiment\",\n", " title=\"What is the sentiment of the message?\",\n", - " labels=[\"positive\", \"neutral\", \"negative\"]\n", - " )\n", - " ]\n", + " labels=[\"positive\", \"neutral\", \"negative\"],\n", + " ),\n", + " ],\n", ")" ] }, @@ -335,12 +335,10 @@ "outputs": [], "source": [ "task_mapping_topics = TrainingTaskMapping.for_text_classification(\n", - " text=dataset.field_by_name(\"text\"),\n", - " label=dataset.question_by_name(\"topics\")\n", + " text=dataset.field_by_name(\"text\"), label=dataset.question_by_name(\"topics\")\n", ")\n", "task_mapping_sentiment = TrainingTaskMapping.for_text_classification(\n", - " text=dataset.field_by_name(\"text\"),\n", - " label=dataset.question_by_name(\"sentiment\")\n", + " text=dataset.field_by_name(\"text\"), label=dataset.question_by_name(\"sentiment\")\n", ")" ] }, @@ -368,7 +366,9 @@ "source": [ "from transformers import pipeline\n", "\n", - "sentiment_classifier = pipeline(model=\"cardiffnlp/twitter-roberta-base-sentiment-latest\")" + "sentiment_classifier = pipeline(\n", + " model=\"cardiffnlp/twitter-roberta-base-sentiment-latest\"\n", + ")" ] }, { @@ -394,17 +394,17 @@ "\n", "records = [\n", " rg.FeedbackRecord(\n", - " fields={\"text\": record['text']},\n", + " fields={\"text\": record[\"text\"]},\n", " responses=[\n", " {\n", " \"values\": {\n", " # Get the sentiment from a pretrained transformer model\n", " \"sentiment\": {\"value\": get_sentiment(record[\"text\"])},\n", " # Add the topics as a list\n", - " \"topics\": {\"value\": [topic_int2str(record[\"label\"])]}\n", + " \"topics\": {\"value\": [topic_int2str(record[\"label\"])]},\n", " }\n", " }\n", - " ]\n", + " ],\n", " )\n", " for record in data.select(record_indices)\n", "]" diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-fastfit-agreement.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-fastfit-agreement.ipynb index b1e4203f3e..49cd9d4d6f 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-fastfit-agreement.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-fastfit-agreement.ipynb @@ -151,11 +151,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -178,7 +174,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -201,9 +197,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -264,8 +263,8 @@ "source": [ "# Get the 151 classes and prepare the conversion dictionaries\n", "labels = [label for label in hf_dataset.features[\"intent\"].names if label]\n", - "label2id = {label:id for id, label in enumerate(labels)}\n", - "id2label = {id:label for label, id in label2id.items()}\n", + "label2id = {label: id for id, label in enumerate(labels)}\n", + "id2label = {id: label for label, id in label2id.items()}\n", "len(labels)" ] }, @@ -283,15 +282,15 @@ "outputs": [], "source": [ "# Save the needed data\n", - "splits = ['train', 'val', 'test']\n", + "splits = [\"train\", \"val\", \"test\"]\n", "\n", - "data = {split: {'text': [], 'intent': []} for split in splits}\n", + "data = {split: {\"text\": [], \"intent\": []} for split in splits}\n", "\n", "for split in splits:\n", " for entry in hf_dataset:\n", - " if entry['split'] == split:\n", - " data[split]['text'].append(entry['text'])\n", - " data[split]['intent'].append(id2label[entry['intent']])\n", + " if entry[\"split\"] == split:\n", + " data[split][\"text\"].append(entry[\"text\"])\n", + " data[split][\"intent\"].append(id2label[entry[\"intent\"]])\n", "\n", "# Create the dataset\n", "dataset = DatasetDict({split: Dataset.from_dict(data[split]) for split in data})" @@ -335,10 +334,12 @@ ], "source": [ "# Sample the dataset\n", - "dataset[\"train\"] = sample_dataset(dataset[\"train\"], label_column=\"intent\", num_samples_per_label=10, seed=42)\n", + "dataset[\"train\"] = sample_dataset(\n", + " dataset[\"train\"], label_column=\"intent\", num_samples_per_label=10, seed=42\n", + ")\n", "\n", "# Rename the validation split\n", - "dataset['validation'] = dataset.pop('val')\n", + "dataset[\"validation\"] = dataset.pop(\"val\")\n", "\n", "dataset" ] @@ -530,7 +531,9 @@ "source": [ "# Load the model and tokenizer\n", "model = FastFit.from_pretrained(\"intent_fastfit_model\")\n", - "tokenizer = AutoTokenizer.from_pretrained(\"sentence-transformers/paraphrase-mpnet-base-v2\")" + "tokenizer = AutoTokenizer.from_pretrained(\n", + " \"sentence-transformers/paraphrase-mpnet-base-v2\"\n", + ")" ] }, { @@ -574,9 +577,9 @@ " {\n", " \"text\": sample[\"text\"],\n", " \"true_intent\": sample[\"intent\"],\n", - " \"predicted_intent\": classifier(sample[\"text\"])\n", + " \"predicted_intent\": classifier(sample[\"text\"]),\n", " }\n", - " for sample in dataset['validation'].to_list()[:100]\n", + " for sample in dataset[\"validation\"].to_list()[:100]\n", "]" ] }, @@ -686,7 +689,7 @@ " \"value\": sample[\"true_intent\"],\n", " }\n", " },\n", - " \"user_id\": users[0].id\n", + " \"user_id\": users[0].id,\n", " },\n", " {\n", " \"values\": {\n", @@ -694,7 +697,7 @@ " \"value\": random.choice([sample[\"true_intent\"], labels[0]]),\n", " }\n", " },\n", - " \"user_id\": users[1].id\n", + " \"user_id\": users[1].id,\n", " },\n", " {\n", " \"values\": {\n", @@ -702,10 +705,11 @@ " \"value\": sample[\"predicted_intent\"][0][0][\"label\"],\n", " }\n", " },\n", - " \"user_id\": users[2].id\n", - " }\n", - " ]\n", - " ) for sample in predictions\n", + " \"user_id\": users[2].id,\n", + " },\n", + " ],\n", + " )\n", + " for sample in predictions\n", "]\n", "dataset.add_records(records)" ] @@ -759,7 +763,9 @@ "outputs": [], "source": [ "# Retrieve the annotated dataset\n", - "feedback_dataset = rg.FeedbackDataset.from_argilla(name=\"intent_feedback_dataset\", workspace=\"admin\")" + "feedback_dataset = rg.FeedbackDataset.from_argilla(\n", + " name=\"intent_feedback_dataset\", workspace=\"admin\"\n", + ")" ] }, { @@ -772,7 +778,9 @@ "responses_by_annotator = {}\n", "for record in feedback_dataset.records:\n", " if record.responses:\n", - " submitted_responses = [response for response in record.responses if response.status == \"submitted\"]\n", + " submitted_responses = [\n", + " response for response in record.responses if response.status == \"submitted\"\n", + " ]\n", " for response in submitted_responses:\n", " print(response)\n", " annotator_id = str(response.user_id)\n", @@ -903,10 +911,10 @@ "source": [ "# Create a heatmap with seaborn and matplotlib\n", "plt.figure(figsize=(7, 5))\n", - "sns.heatmap(confusion_matrix, annot=False, fmt='d', cmap='Blues')\n", - "plt.xlabel('Predicted')\n", - "plt.ylabel('True')\n", - "plt.title('Confusion Matrix')\n", + "sns.heatmap(confusion_matrix, annot=False, fmt=\"d\", cmap=\"Blues\")\n", + "plt.xlabel(\"Predicted\")\n", + "plt.ylabel(\"True\")\n", + "plt.title(\"Confusion Matrix\")\n", "plt.show()\n", "\n", "# plt.savefig('confusion_matrix.png', dpi=300) # save the image" @@ -994,9 +1002,15 @@ "outputs": [], "source": [ "# Compute Cohen's Kappa for each pair of annotators\n", - "annotator1_2 = cohen_kappa_score(responses_by_annotator[str(users[0].id)], responses_by_annotator[str(users[1].id)])\n", - "annotator2_3 = cohen_kappa_score(responses_by_annotator[str(users[1].id)], responses_by_annotator[str(users[2].id)])\n", - "annotator1_3 = cohen_kappa_score(responses_by_annotator[str(users[0].id)], responses_by_annotator[str(users[2].id)])" + "annotator1_2 = cohen_kappa_score(\n", + " responses_by_annotator[str(users[0].id)], responses_by_annotator[str(users[1].id)]\n", + ")\n", + "annotator2_3 = cohen_kappa_score(\n", + " responses_by_annotator[str(users[1].id)], responses_by_annotator[str(users[2].id)]\n", + ")\n", + "annotator1_3 = cohen_kappa_score(\n", + " responses_by_annotator[str(users[0].id)], responses_by_annotator[str(users[2].id)]\n", + ")" ] }, { @@ -1054,9 +1068,9 @@ "for annotator, responses in responses_by_annotator.items():\n", " for i, response in enumerate(responses):\n", " ratings[i, label2id[response]] += 1\n", - " \n", + "\n", "# Compute Fleiss' Kappa\n", - "kappa = fleiss_kappa(ratings, method='fleiss')" + "kappa = fleiss_kappa(ratings, method=\"fleiss\")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb index aef509bb49..4814a1c017 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb @@ -107,11 +107,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\",\n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -135,7 +131,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -159,9 +155,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -190,14 +189,18 @@ "outputs": [], "source": [ "dataset = rg.FeedbackDataset(\n", - " fields = [\n", + " fields=[\n", " rg.TextField(name=\"background\"),\n", " rg.TextField(name=\"prompt\"),\n", " rg.TextField(name=\"response\", title=\"Final Response\"),\n", " ],\n", - " questions = [\n", - " rg.LabelQuestion(name=\"quality\", title=\"Is it a Good or Bad response?\", labels=[\"Good\", \"Bad\"])\n", - " ]\n", + " questions=[\n", + " rg.LabelQuestion(\n", + " name=\"quality\",\n", + " title=\"Is it a Good or Bad response?\",\n", + " labels=[\"Good\", \"Bad\"],\n", + " )\n", + " ],\n", ")" ] }, @@ -247,14 +250,15 @@ "source": [ "from typing import Dict, Any\n", "\n", + "\n", "def extract_background_prompt_response(text: str) -> Dict[str, Any]:\n", - " '''Extract the anthropic prompt from a prompt and response pair.'''\n", + " \"\"\"Extract the anthropic prompt from a prompt and response pair.\"\"\"\n", " start_prompt = text.find(\":\")\n", " end_prompt = text.rfind(\":\")\n", " # Background is anything before the first :\n", " background = text[:start_prompt].strip()\n", " # Prompt is anything between the first : (inclusive) and the last : (exclusive)\n", - " prompt = text[start_prompt: end_prompt].strip()\n", + " prompt = text[start_prompt:end_prompt].strip()\n", " # Response is everything after the last : (inclusive)\n", " response = text[end_prompt:].strip()\n", " return {\"background\": background, \"prompt\": prompt, \"response\": response}\n", @@ -286,7 +290,7 @@ " )\n", " for sample in data\n", "]\n", - "dataset.add_records(records)\n" + "dataset.add_records(records)" ] }, { @@ -357,7 +361,9 @@ "import torch\n", "\n", "model_id = \"mistralai/Mistral-7B-v0.1\"\n", - "model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map=\"auto\")\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " model_id, torch_dtype=torch.float16, device_map=\"auto\"\n", + ")\n", "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "tokenizer.pad_token_id = tokenizer.eos_token_id" ] @@ -402,6 +408,7 @@ "\n", "ANNOTATED_ONLY = False\n", "\n", + "\n", "def formatting_func(sample: Dict[str, Any]) -> Iterator[str]:\n", " if ANNOTATED_ONLY:\n", " # Discard if there are no annotations...\n", @@ -410,7 +417,10 @@ "\n", " # or if it is annotated as \"Bad\" or discarded.\n", " first_annotation = sample[\"quality\"][0]\n", - " if first_annotation[\"value\"] == \"Bad\" or first_annotation[\"status\"] == \"discarded\":\n", + " if (\n", + " first_annotation[\"value\"] == \"Bad\"\n", + " or first_annotation[\"status\"] == \"discarded\"\n", + " ):\n", " return\n", "\n", " # Filter out responses that are likely low quality\n", @@ -419,8 +429,10 @@ "\n", " # Add between all prompt-response pairs\n", " prompt = sample[\"prompt\"]\n", - " prompt = prompt.replace(\":\", f\"{tokenizer.eos_token}{tokenizer.bos_token}:\")\n", - " prompt = prompt[prompt.find(\":\"):]\n", + " prompt = prompt.replace(\n", + " \":\", f\"{tokenizer.eos_token}{tokenizer.bos_token}:\"\n", + " )\n", + " prompt = prompt[prompt.find(\":\") :]\n", " # Add response and optionally the background to the full text.\n", " output = prompt + \" \" + sample[\"response\"]\n", " if sample[\"background\"]:\n", @@ -432,6 +444,7 @@ " assert output.count(\"\") + 1 == output.count(\"\")\n", " return output\n", "\n", + "\n", "task = TrainingTask.for_supervised_fine_tuning(formatting_func)" ] }, @@ -574,6 +587,7 @@ "source": [ "from transformers import DataCollatorForSeq2Seq, BatchEncoding\n", "\n", + "\n", "class DataCollatorForSeq2SeqCopyLabels(DataCollatorForSeq2Seq):\n", " def __call__(self, features, return_tensors=None) -> BatchEncoding:\n", " for feature in features:\n", @@ -643,7 +657,15 @@ "source": [ "from typing import Optional\n", "import torch\n", - "from transformers import TrainerCallback, TrainerControl, TrainerState, GenerationConfig, TrainingArguments, PreTrainedModel, PreTrainedTokenizer\n", + "from transformers import (\n", + " TrainerCallback,\n", + " TrainerControl,\n", + " TrainerState,\n", + " GenerationConfig,\n", + " TrainingArguments,\n", + " PreTrainedModel,\n", + " PreTrainedTokenizer,\n", + ")\n", "\n", "\n", "class GenerationCallback(TrainerCallback):\n", @@ -651,7 +673,15 @@ " super().__init__()\n", " self.prompt = prompt\n", "\n", - " def on_evaluate(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, model: Optional[PreTrainedModel] = None, tokenizer: Optional[PreTrainedTokenizer] = None, **kwargs):\n", + " def on_evaluate(\n", + " self,\n", + " args: TrainingArguments,\n", + " state: TrainerState,\n", + " control: TrainerControl,\n", + " model: Optional[PreTrainedModel] = None,\n", + " tokenizer: Optional[PreTrainedTokenizer] = None,\n", + " **kwargs,\n", + " ):\n", " # Tokenize the prompt and send it to the right device\n", " inputs = tokenizer(self.prompt, return_tensors=\"pt\").to(model.device)\n", "\n", @@ -667,7 +697,9 @@ " print(tokenizer.batch_decode(outputs, skip_special_tokens=False)[0])\n", "\n", "\n", - "generation_callback = GenerationCallback(\": What were Nelson Mandela's relations with the ANC? :\")" + "generation_callback = GenerationCallback(\n", + " \": What were Nelson Mandela's relations with the ANC? :\"\n", + ")" ] }, { @@ -838,8 +870,12 @@ "inputs = tokenizer(text, return_tensors=\"pt\").to(model.device)\n", "\n", "with torch.no_grad():\n", - " outputs = model.generate(input_ids=inputs[\"input_ids\"], max_new_tokens=250, pad_token_id=tokenizer.pad_token_id)\n", - "print(tokenizer.batch_decode(outputs, skip_special_tokens=False)[0])\n" + " outputs = model.generate(\n", + " input_ids=inputs[\"input_ids\"],\n", + " max_new_tokens=250,\n", + " pad_token_id=tokenizer.pad_token_id,\n", + " )\n", + "print(tokenizer.batch_decode(outputs, skip_special_tokens=False)[0])" ] }, { @@ -860,7 +896,11 @@ "inputs = tokenizer(text, return_tensors=\"pt\").to(model.device)\n", "\n", "with torch.no_grad():\n", - " outputs = model.generate(input_ids=inputs[\"input_ids\"], max_new_tokens=250, pad_token_id=tokenizer.pad_token_id)\n", + " outputs = model.generate(\n", + " input_ids=inputs[\"input_ids\"],\n", + " max_new_tokens=250,\n", + " pad_token_id=tokenizer.pad_token_id,\n", + " )\n", "print(tokenizer.batch_decode(outputs, skip_special_tokens=False)[0])" ] }, @@ -952,7 +992,11 @@ "inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", "\n", "with torch.no_grad():\n", - " outputs = model.generate(input_ids=inputs[\"input_ids\"], max_new_tokens=250, pad_token_id=tokenizer.pad_token_id)\n", + " outputs = model.generate(\n", + " input_ids=inputs[\"input_ids\"],\n", + " max_new_tokens=250,\n", + " pad_token_id=tokenizer.pad_token_id,\n", + " )\n", "print(tokenizer.batch_decode(outputs, skip_special_tokens=False)[0])" ] }, diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-qa-transformers.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-qa-transformers.ipynb index 8e6a494253..3186b4841d 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-qa-transformers.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-qa-transformers.ipynb @@ -143,7 +143,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -167,9 +167,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -301,10 +304,15 @@ " \"context\": item[\"context\"],\n", " },\n", " suggestions=[\n", - " {\"question_name\": \"answer\",\n", - " \"value\": question_answerer(question=item[\"question\"], context=item[\"context\"])[\"answer\"]},\n", - " ]\n", - " ) for item in dataset_hf\n", + " {\n", + " \"question_name\": \"answer\",\n", + " \"value\": question_answerer(\n", + " question=item[\"question\"], context=item[\"context\"]\n", + " )[\"answer\"],\n", + " },\n", + " ],\n", + " )\n", + " for item in dataset_hf\n", "]" ] }, @@ -339,7 +347,9 @@ "metadata": {}, "outputs": [], "source": [ - "remote_dataset = dataset.push_to_argilla(name=\"demonstration_data_squad\", workspace=\"argilla\") " + "remote_dataset = dataset.push_to_argilla(\n", + " name=\"demonstration_data_squad\", workspace=\"argilla\"\n", + ")" ] }, { @@ -371,7 +381,9 @@ "metadata": {}, "outputs": [], "source": [ - "annotated_dataset = rg.FeedbackDataset.from_argilla(\"demonstration_data_squad\", workspace=\"argilla\")" + "annotated_dataset = rg.FeedbackDataset.from_argilla(\n", + " \"demonstration_data_squad\", workspace=\"argilla\"\n", + ")" ] }, { @@ -398,7 +410,9 @@ ], "source": [ "item = annotated_dataset[3]\n", - "print(f\"Question: {item.fields['question']}\\nContext: {item.fields['context']}\\nAnnotated Answer: {item.responses[0].values['answer'].value}\")" + "print(\n", + " f\"Question: {item.fields['question']}\\nContext: {item.fields['context']}\\nAnnotated Answer: {item.responses[0].values['answer'].value}\"\n", + ")" ] }, { @@ -461,6 +475,7 @@ " continue\n", " yield question, context, answer[\"value\"]\n", "\n", + "\n", "task = TrainingTask.for_question_answering(formatting_func=formatting_func)" ] }, @@ -501,13 +516,13 @@ "outputs": [], "source": [ "trainer.update_config(\n", - " learning_rate = 4e-2,\n", - " weight_decay = 0.01,\n", - " max_grad_norm = 1,\n", - " num_train_epochs = 3,\n", - " logging_strategy = \"steps\",\n", - " save_strategy = \"steps\",\n", - " save_steps = 100,\n", + " learning_rate=4e-2,\n", + " weight_decay=0.01,\n", + " max_grad_norm=1,\n", + " num_train_epochs=3,\n", + " logging_strategy=\"steps\",\n", + " save_strategy=\"steps\",\n", + " save_steps=100,\n", ")" ] }, @@ -543,10 +558,7 @@ "outputs": [], "source": [ "qna_pipeline = pipeline(\n", - " \"question-answering\",\n", - " model=\"my_qna_model\",\n", - " tokenizer=tokenizer,\n", - " device=device\n", + " \"question-answering\", model=\"my_qna_model\", tokenizer=tokenizer, device=device\n", ")" ] }, @@ -577,7 +589,10 @@ } ], "source": [ - "qna_pipeline(question=\"For what is Venezuela famous?\", context=\"Venezuela is known for its natural beauty.\")" + "qna_pipeline(\n", + " question=\"For what is Venezuela famous?\",\n", + " context=\"Venezuela is known for its natural beauty.\",\n", + ")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-setfit-absa.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-setfit-absa.ipynb index 14d96f3d46..0c4661e83b 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-setfit-absa.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-setfit-absa.ipynb @@ -134,11 +134,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"argilla.apikey\",\n", - " workspace=\"argilla\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"argilla.apikey\", workspace=\"argilla\")" ] }, { @@ -161,7 +157,7 @@ "# # Replace api_url with the url to your HF Spaces URL\n", "# # Replace api_key if you configured a custom API key\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"admin.apikey\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", "# )" @@ -184,9 +180,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -229,7 +228,9 @@ ], "source": [ "# Load the dataset\n", - "hf_dataset = load_dataset(\"jakartaresearch/semeval-absa\", \"restaurant\", split=\"train\", trust_remote_code=True)\n", + "hf_dataset = load_dataset(\n", + " \"jakartaresearch/semeval-absa\", \"restaurant\", split=\"train\", trust_remote_code=True\n", + ")\n", "hf_dataset[0]" ] }, @@ -260,8 +261,8 @@ "train_eval_split = hf_dataset_sample.train_test_split(test_size=0.3)\n", "\n", "# Converting the training and development datasets to pandas DataFrames\n", - "train_df = train_eval_split['train'].to_pandas()\n", - "eval_df = train_eval_split['test'].to_pandas()" + "train_df = train_eval_split[\"train\"].to_pandas()\n", + "eval_df = train_eval_split[\"test\"].to_pandas()" ] }, { @@ -284,31 +285,37 @@ "def calculate_ordinal(text, term, from_index):\n", " ordinal = 0\n", " start = 0\n", - " \n", + "\n", " while start < from_index:\n", " found_index = text.find(term, start)\n", " if found_index == -1 or found_index >= from_index:\n", - " break \n", + " break\n", " ordinal += 1\n", " start = found_index + len(term)\n", " return ordinal\n", "\n", + "\n", "# Function to prepare the dataset for training\n", "def prepare_dataset(df):\n", " prepared_data = []\n", " for _, row in df.iterrows():\n", - " text, aspects = row['text'], row['aspects']\n", - " for term, polarity, start_index in zip(aspects['term'], aspects['polarity'], aspects['from']):\n", - " if polarity not in ['positive', 'negative']: # Skip neutral polarity\n", + " text, aspects = row[\"text\"], row[\"aspects\"]\n", + " for term, polarity, start_index in zip(\n", + " aspects[\"term\"], aspects[\"polarity\"], aspects[\"from\"]\n", + " ):\n", + " if polarity not in [\"positive\", \"negative\"]: # Skip neutral polarity\n", " continue\n", - " prepared_data.append({\n", - " \"text\": text,\n", - " \"span\": term,\n", - " \"label\": polarity,\n", - " \"ordinal\": calculate_ordinal(text, term, start_index)\n", - " })\n", + " prepared_data.append(\n", + " {\n", + " \"text\": text,\n", + " \"span\": term,\n", + " \"label\": polarity,\n", + " \"ordinal\": calculate_ordinal(text, term, start_index),\n", + " }\n", + " )\n", " return prepared_data\n", "\n", + "\n", "# Helper function to convert a list of dictionaries to a dictionary of lists\n", "def list_dict_to_dict_list(list_dict):\n", " return {key: [dic[key] for dic in list_dict] for key in list_dict[0]}" @@ -479,6 +486,7 @@ "source": [ "# Check your GPU availability\n", "import torch\n", + "\n", "if torch.cuda.is_available():\n", " device = torch.device(\"cuda\")\n", " print(f\"Using {torch.cuda.get_device_name(0)}\")\n", @@ -584,10 +592,7 @@ "outputs": [], "source": [ "# Save the classification models locally or push them to the Hub\n", - "model.save_pretrained(\n", - " \"models/setfit-absa-aspect\", \n", - " \"models/setfit-absa-polarity\"\n", - ")\n", + "model.save_pretrained(\"models/setfit-absa-aspect\", \"models/setfit-absa-polarity\")\n", "\n", "# model.push_to_hub(\n", "# \"[hf-repo]/setfit-absa-aspect\",\n", @@ -648,8 +653,8 @@ "outputs": [], "source": [ "# Define the sentences and labels\n", - "labels = [label for label in test_dataset_sample['label']]\n", - "sentences = [text for text in test_dataset_sample['text']]\n", + "labels = [label for label in test_dataset_sample[\"label\"]]\n", + "sentences = [text for text in test_dataset_sample[\"text\"]]\n", "\n", "# Helper dictionaries\n", "id2label_overall = {0: \"NEG\", 1: \"NEU\", 2: \"POS\"}\n", @@ -692,8 +697,7 @@ "source": [ "# Load the models\n", "model = AbsaModel.from_pretrained(\n", - " \"models/setfit-absa-model-aspect\", \n", - " \"models/setfit-absa-model-polarity\"\n", + " \"models/setfit-absa-model-aspect\", \"models/setfit-absa-model-polarity\"\n", ")\n", "\n", "# Make predictions\n", @@ -759,7 +763,7 @@ " rg.LabelQuestion(\n", " name=\"overall-sentiment\",\n", " title=\"What is the overall sentiment of the text?\",\n", - " labels={\"POS\": \"Positive\", \"NEU\":\"Neutral\", \"NEG\": \"Negative\"},\n", + " labels={\"POS\": \"Positive\", \"NEU\": \"Neutral\", \"NEG\": \"Negative\"},\n", " required=True,\n", " ),\n", " rg.SpanQuestion(\n", @@ -767,10 +771,10 @@ " title=\"Highlight the aspects and their polarity in the text:\",\n", " labels={\"POS\", \"NEG\"},\n", " field=\"aspect-based-sentiment-analysis\",\n", - " required=True\n", + " required=True,\n", " ),\n", " ],\n", - " guidelines=\"Please, read the question carefully and try to answer it as accurately as possible.\"\n", + " guidelines=\"Please, read the question carefully and try to answer it as accurately as possible.\",\n", ")\n", "rg_dataset = rg_dataset.push_to_argilla(name=\"absa-dataset\", workspace=\"argilla\")" ] @@ -793,31 +797,41 @@ "# Helper function to find the span indices in the sentence and return a list of SpanValueSchema objects\n", "nlp = spacy.load(\"en_core_web_lg\")\n", "\n", + "\n", "def find_span_indices(sentence, predictions):\n", " doc = nlp(sentence)\n", " found_spans = []\n", - " \n", - " last_found_index = {span['span']: -1 for span in predictions}\n", - " \n", + "\n", + " last_found_index = {span[\"span\"]: -1 for span in predictions}\n", + "\n", " for span_dict in predictions:\n", - " span_text = span_dict['span']\n", + " span_text = span_dict[\"span\"]\n", " found = False\n", "\n", " for i in range(len(doc)):\n", - " window_text = \" \".join(doc[j].text for j in range(i, min(i + len(span_text.split()), len(doc))))\n", - " \n", + " window_text = \" \".join(\n", + " doc[j].text for j in range(i, min(i + len(span_text.split()), len(doc)))\n", + " )\n", + "\n", " if window_text == span_text and i > last_found_index[span_text]:\n", " start_index = doc[i].idx\n", - " end_index = doc[i + len(span_text.split()) - 1].idx + len(doc[i + len(span_text.split()) - 1])\n", - " \n", - " found_spans.append(SpanValueSchema(start=start_index, end=end_index, \n", - " label=id2label_span[span_dict['polarity']]))\n", + " end_index = doc[i + len(span_text.split()) - 1].idx + len(\n", + " doc[i + len(span_text.split()) - 1]\n", + " )\n", + "\n", + " found_spans.append(\n", + " SpanValueSchema(\n", + " start=start_index,\n", + " end=end_index,\n", + " label=id2label_span[span_dict[\"polarity\"]],\n", + " )\n", + " )\n", " last_found_index[span_text] = i\n", " found = True\n", " break\n", " if not found:\n", " raise ValueError(f\"Span '{span_text}' not found in the sentence.\")\n", - " \n", + "\n", " return found_spans" ] }, @@ -831,21 +845,19 @@ "records = [\n", " rg.FeedbackRecord(\n", " fields={\n", - " \"text\": sentence, \n", - " \"aspect-based-sentiment-analysis\": sentence, \n", + " \"text\": sentence,\n", + " \"aspect-based-sentiment-analysis\": sentence,\n", " },\n", " responses=[\n", " {\n", " \"values\": {\n", - " \"overall-sentiment\": {\n", - " \"value\": id2label_overall[label]\n", - " },\n", + " \"overall-sentiment\": {\"value\": id2label_overall[label]},\n", " \"aspect-polarity\": {\n", " \"value\": find_span_indices(sentence, prediction),\n", " },\n", " }\n", " }\n", - " ]\n", + " ],\n", " )\n", " for sentence, prediction, label in zip(sentences, predictions, labels)\n", "]\n", @@ -887,7 +899,9 @@ "outputs": [], "source": [ "# Retrieve the annotated dataset and filter the records\n", - "annotated_dataset = rg.FeedbackDataset.from_argilla(name=\"absa-dataset\", workspace=\"argilla\")\n", + "annotated_dataset = rg.FeedbackDataset.from_argilla(\n", + " name=\"absa-dataset\", workspace=\"argilla\"\n", + ")\n", "filtered_dataset = annotated_dataset.filter_by(response_status=\"submitted\")" ] }, @@ -908,6 +922,7 @@ "def get_span_text(text, start, end):\n", " return text[start:end]\n", "\n", + "\n", "def prepare_absa_dataset(records):\n", " data = []\n", " for record in records:\n", @@ -915,16 +930,22 @@ "\n", " for response in record.responses:\n", " overall_sentiment = response.values[\"overall-sentiment\"].value\n", - " \n", + "\n", " for aspect_details in response.values[\"aspect-polarity\"].value:\n", - " aspect_text = get_span_text(text, aspect_details.start, aspect_details.end)\n", - " data.append({\n", - " \"text\": text,\n", - " \"span\": aspect_text,\n", - " \"label\": aspect_details.label,\n", - " \"ordinal\": calculate_ordinal(text, aspect_text, aspect_details.start),\n", - " \"overall\": overall_sentiment\n", - " })\n", + " aspect_text = get_span_text(\n", + " text, aspect_details.start, aspect_details.end\n", + " )\n", + " data.append(\n", + " {\n", + " \"text\": text,\n", + " \"span\": aspect_text,\n", + " \"label\": aspect_details.label,\n", + " \"ordinal\": calculate_ordinal(\n", + " text, aspect_text, aspect_details.start\n", + " ),\n", + " \"overall\": overall_sentiment,\n", + " }\n", + " )\n", " return data" ] }, diff --git a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_learning_with_setfit.ipynb b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_learning_with_setfit.ipynb index c69d847962..0773720e4d 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_learning_with_setfit.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_learning_with_setfit.ipynb @@ -115,15 +115,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -132,6 +129,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -143,7 +141,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -174,6 +172,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -184,14 +183,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -217,7 +220,7 @@ "\n", "unlabelled = rg.DatasetForTextClassification.from_datasets(unlabelled)\n", "\n", - "rg.log(unlabelled, \"imdb_unlabelled\")\n" + "rg.log(unlabelled, \"imdb_unlabelled\")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_text_classification_with_active_learning.ipynb b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_text_classification_with_active_learning.ipynb index dad8b22c35..a6e886e777 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_text_classification_with_active_learning.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/few_shot_text_classification_with_active_learning.ipynb @@ -132,15 +132,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -149,6 +146,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -160,7 +158,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -169,6 +167,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -179,14 +178,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -224,12 +227,12 @@ "# Disables the progress bar for notebooks: https://github.com/huggingface/datasets/issues/2651\n", "datasets.logging.get_verbosity = lambda: logging.NOTSET\n", "\n", - "raw_dataset = datasets.load_dataset('ag_news')\n", - "num_classes = np.unique(raw_dataset['train']['label']).shape[0]\n", + "raw_dataset = datasets.load_dataset(\"ag_news\")\n", + "num_classes = np.unique(raw_dataset[\"train\"][\"label\"]).shape[0]\n", "\n", - "print('First 10 training samples:\\n')\n", + "print(\"First 10 training samples:\\n\")\n", "for i in range(10):\n", - " print(raw_dataset['train']['label'][i], ' ', raw_dataset['train']['text'][i])" + " print(raw_dataset[\"train\"][\"label\"][i], \" \", raw_dataset[\"train\"][\"text\"][i])" ] }, { @@ -258,19 +261,19 @@ "import numpy as np\n", "from small_text import TextDataset\n", "\n", - "num_classes = raw_dataset['train'].features['label'].num_classes\n", + "num_classes = raw_dataset[\"train\"].features[\"label\"].num_classes\n", "\n", "target_labels = np.arange(num_classes)\n", "\n", "train = TextDataset.from_arrays(\n", - " raw_dataset['train']['text'], \n", - " np.array(raw_dataset['train']['label']), \n", - " target_labels=target_labels\n", + " raw_dataset[\"train\"][\"text\"],\n", + " np.array(raw_dataset[\"train\"][\"label\"]),\n", + " target_labels=target_labels,\n", ")\n", "test = TextDataset.from_arrays(\n", - " raw_dataset['test']['text'], \n", - " np.array(raw_dataset['test']['label']), \n", - " target_labels=target_labels\n", + " raw_dataset[\"test\"][\"text\"],\n", + " np.array(raw_dataset[\"test\"][\"label\"]),\n", + " target_labels=target_labels,\n", ")" ] }, @@ -317,23 +320,20 @@ "logging.getLogger(\"small_text\").setLevel(logging.INFO)\n", "\n", "# Define our classifier\n", - "sentence_transformer_model_name = 'sentence-transformers/paraphrase-mpnet-base-v2'\n", + "sentence_transformer_model_name = \"sentence-transformers/paraphrase-mpnet-base-v2\"\n", "setfit_model_args = SetFitModelArguments(sentence_transformer_model_name)\n", - "clf_factory = SetFitClassificationFactory(\n", - " setfit_model_args, \n", - " num_classes\n", - ")\n", + "clf_factory = SetFitClassificationFactory(setfit_model_args, num_classes)\n", "\n", "# Define our query strategy\n", "query_strategy = BreakingTies()\n", - "setfit_train_kwargs = {'show_progress_bar': False}\n", + "setfit_train_kwargs = {\"show_progress_bar\": False}\n", "\n", "# Use the active learner with a pool containing all unlabeled data\n", "active_learner = PoolBasedActiveLearner(\n", - " clf_factory, \n", - " query_strategy, \n", - " train, \n", - " fit_kwargs={'setfit_train_kwargs': setfit_train_kwargs}\n", + " clf_factory,\n", + " query_strategy,\n", + " train,\n", + " fit_kwargs={\"setfit_train_kwargs\": setfit_train_kwargs},\n", ")" ] }, @@ -363,7 +363,7 @@ "NUM_SAMPLES = 20\n", "\n", "# Randomly draw an initial subset from the data pool\n", - "initial_indices = random_initialization(dataset, NUM_SAMPLES)\n" + "initial_indices = random_initialization(dataset, NUM_SAMPLES)" ] }, { @@ -415,7 +415,7 @@ "]\n", "\n", "# Log the initial records to Argilla\n", - "rg.log(records, DATASET_NAME)\n" + "rg.log(records, DATASET_NAME)" ] }, { @@ -444,6 +444,7 @@ "LABEL2INT = raw_dataset[\"train\"].features[\"label\"].str2int\n", "ACCURACIES = []\n", "\n", + "\n", "# Set up the active learning loop with the listener decorator\n", "@listener(\n", " dataset=DATASET_NAME,\n", @@ -497,7 +498,7 @@ " ctx.query_params[\"batch_id\"] = new_batch\n", " print(\"Done!\")\n", "\n", - " print(\"Waiting for annotations ...\")\n" + " print(\"Waiting for annotations ...\")" ] }, { @@ -523,7 +524,7 @@ "metadata": {}, "outputs": [], "source": [ - "active_learning_loop.start()\n" + "active_learning_loop.start()" ] }, { @@ -566,7 +567,7 @@ "source": [ "import pandas as pd\n", "\n", - "pd.Series(ACCURACIES).plot(xlabel=\"Iteration\", ylabel=\"Accuracy\")\n" + "pd.Series(ACCURACIES).plot(xlabel=\"Iteration\", ylabel=\"Accuracy\")" ] }, { @@ -585,7 +586,7 @@ "metadata": {}, "outputs": [], "source": [ - "active_learning_loop.stop()\n" + "active_learning_loop.stop()" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_data_with_semantic_search.ipynb b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_data_with_semantic_search.ipynb index 1e73304d76..e92798fdf2 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_data_with_semantic_search.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_data_with_semantic_search.ipynb @@ -117,11 +117,7 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { @@ -145,7 +141,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -187,9 +183,12 @@ "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -216,9 +215,9 @@ "\n", "# Encode text field using batched computation\n", "dataset = dataset.map(\n", - " lambda batch: {\"vectors\": encoder.encode(batch[\"text\"])}, \n", - " batch_size=32, \n", - " batched=True\n", + " lambda batch: {\"vectors\": encoder.encode(batch[\"text\"])},\n", + " batch_size=32,\n", + " batched=True,\n", ")\n", "\n", "# Removes the original labels because you'll be labeling from scratch\n", @@ -345,7 +344,16 @@ "\n", "# Our labeling scheme\n", "settings = rg.TextClassificationSettings(\n", - " label_schema=[\"change_details\", \"card\", \"atm\", \"top_up\", \"balance\", \"transfer\", \"exchange_rate\", \"pin\"]\n", + " label_schema=[\n", + " \"change_details\",\n", + " \"card\",\n", + " \"atm\",\n", + " \"top_up\",\n", + " \"balance\",\n", + " \"transfer\",\n", + " \"exchange_rate\",\n", + " \"pin\",\n", + " ]\n", ")\n", "\n", "rg.configure_dataset_settings(name=\"banking77-topics\", settings=settings)\n", diff --git a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_errors_cleanlab.ipynb b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_errors_cleanlab.ipynb index 6b90d7ff29..a0e2ea3809 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_errors_cleanlab.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/label_errors_cleanlab.ipynb @@ -114,15 +114,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -131,6 +128,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -142,7 +140,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -171,11 +169,12 @@ "from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.pipeline import Pipeline\n", "\n", - "from argilla.labeling.text_classification import find_label_errors\n" + "from argilla.labeling.text_classification import find_label_errors" ] }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -186,14 +185,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -277,7 +280,7 @@ "classifier = Pipeline([(\"vect\", CountVectorizer()), (\"clf\", MultinomialNB())])\n", "\n", "# Fit the classifier\n", - "classifier.fit(X=ds_train[\"text\"], y=ds_train[\"label\"])\n" + "classifier.fit(X=ds_train[\"text\"], y=ds_train[\"label\"])" ] }, { @@ -411,7 +414,7 @@ "outputs": [], "source": [ "# Uncover label errors in the Argilla web app\n", - "rg.log(records_with_label_error, \"label_errors\")\n" + "rg.log(records_with_label_error, \"label_errors\")" ] }, { @@ -479,7 +482,7 @@ " cv=int(len(ds_train) / len(ds_test)),\n", " method=\"predict_proba\",\n", " n_jobs=-1,\n", - ")\n" + ")" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_ner.ipynb b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_ner.ipynb index 6ad3468ce1..9b869a6fd0 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_ner.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_ner.ipynb @@ -145,15 +145,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -162,6 +159,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -173,7 +171,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -222,6 +220,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -232,14 +231,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -356,7 +359,6 @@ }, "outputs": [], "source": [ - "\n", "conll2003 = load_dataset(\"conll2003\")" ] }, @@ -533,7 +535,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "sports_results_annotator = FunctionAnnotator(\"sports_results\", sports_results_detector)\n", "sports_match_annotator = FunctionAnnotator(\"sports_match\", sports_match_detector)" ] @@ -619,7 +620,7 @@ " yield start, end, label\n", "\n", " start = None\n", - " end = None\n" + " end = None" ] }, { @@ -683,7 +684,7 @@ "for keyword in title_ending:\n", " func = partial(title_detector, keyword=keyword, reverse=True)\n", " annotator = FunctionAnnotator(keyword + \" (end)\", func)\n", - " rule_based_annotator.add_annotator(annotator)\n" + " rule_based_annotator.add_annotator(annotator)" ] }, { @@ -853,7 +854,7 @@ " )\n", "\n", "\n", - "rg.log(records=spans_logger(dev_docs), name=\"conll_2003_dev_spans\")\n" + "rg.log(records=spans_logger(dev_docs), name=\"conll_2003_dev_spans\")" ] }, { @@ -1090,6 +1091,7 @@ " dev_docs, gold_span_name=\"gold\", gold_labels=[\"ORG\", \"MISC\", \"PER\", \"LOC\", \"O\"]\n", ")\n", "\n", + "\n", "def scores_to_df(scores):\n", " for annotator, label_dict in scores.items():\n", " for label, metrics_dict in label_dict.items():\n", @@ -1325,7 +1327,7 @@ "\n", "pd.DataFrame(\n", " [{k: v for k, v in scores.items() if k in [\"ents_p\", \"ents_r\", \"ents_f\"]}]\n", - ").round(3)\n" + ").round(3)" ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_text_classification_semantic_search.ipynb b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_text_classification_semantic_search.ipynb index 06283385de..140d884ae9 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_text_classification_semantic_search.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/weak_supervision_text_classification_semantic_search.ipynb @@ -157,15 +157,12 @@ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", "# Replace workspace with the name of your workspace\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"owner.apikey\",\n", - " workspace=\"admin\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"owner.apikey\", workspace=\"admin\")" ] }, { "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", "metadata": {}, "source": [ "If you're running a private Hugging Face Space, you will also need to set the [HF_TOKEN](https://huggingface.co/settings/tokens) as follows:" @@ -174,6 +171,7 @@ { "cell_type": "code", "execution_count": null, + "id": "acae54e37e7d407bbb7b55eff062a284", "metadata": {}, "outputs": [], "source": [ @@ -185,7 +183,7 @@ "# # Replace api_key if you configured a custom API key\n", "# # Replace workspace with the name of your workspace\n", "# rg.init(\n", - "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\", \n", + "# api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n", "# api_key=\"owner.apikey\",\n", "# workspace=\"admin\",\n", "# extra_headers={\"Authorization\": f\"Bearer {os.environ['HF_TOKEN']}\"},\n", @@ -220,6 +218,7 @@ }, { "cell_type": "markdown", + "id": "9a63283cbaf04dbcab1f6479b197f3a8", "metadata": {}, "source": [ "### Enable Telemetry\n", @@ -230,14 +229,18 @@ { "cell_type": "code", "execution_count": null, + "id": "8dd0d8092fe74a7c96281538738b07e2", "metadata": {}, "outputs": [], "source": [ "try:\n", " from argilla.utils.telemetry import tutorial_running\n", + "\n", " tutorial_running()\n", "except ImportError:\n", - " print(\"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\")" + " print(\n", + " \"Telemetry is introduced in Argilla 1.20.0 and not found in the current installation. Skipping telemetry.\"\n", + " )" ] }, { @@ -1656,7 +1659,7 @@ " 0.6,\n", " 0.6,\n", " 0.8,\n", - "]\n" + "]" ] }, { @@ -1788,7 +1791,7 @@ "\n", " return metrics.classification_report(\n", " y_test, predicted, target_names=[k for k in label2int.keys() if k]\n", - " )\n" + " )" ] }, { @@ -1834,7 +1837,7 @@ } ], "source": [ - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -1910,7 +1913,7 @@ " return transitions_df, text\n", "\n", "\n", - "transitions, text = get_transitions(weak_labels, 15)\n" + "transitions, text = get_transitions(weak_labels, 15)" ] }, { @@ -2181,7 +2184,7 @@ } ], "source": [ - "transitions.transpose()\n" + "transitions.transpose()" ] }, { @@ -2213,7 +2216,7 @@ "thresholds = [0.8] * len(rules)\n", "\n", "# As we have already generated the index in our first call, we just need to provide the thresholds.\n", - "weak_labels.extend_matrix(thresholds)\n" + "weak_labels.extend_matrix(thresholds)" ] }, { @@ -2480,7 +2483,7 @@ "summary = summary.rename(columns={\"index\": \"rule\"})\n", "summary = summary.sort_values(by=\"overlaps\", ascending=True)[[\"rule\", \"overlaps\"]]\n", "summary = summary.reset_index()\n", - "summary\n" + "summary" ] }, { @@ -2526,7 +2529,7 @@ "weak_labels.extend_matrix(thresholds)\n", "label_model = Snorkel(weak_labels)\n", "label_model.fit(lr=0.002, n_epochs=10, progress_bar=False)\n", - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -2566,7 +2569,7 @@ " len(weak_labels.annotation()),\n", " )\n", " coverage = sup / n\n", - " return 2 * acc * coverage / (acc + coverage)\n" + " return 2 * acc * coverage / (acc + coverage)" ] }, { @@ -2613,7 +2616,7 @@ "source": [ "import copy\n", "from tqdm.auto import tqdm\n", - "import numpy as np \n", + "import numpy as np\n", "\n", "ths_range = np.arange(1, 0.3, -0.1)\n", "n_ths = len(weak_labels.rules)\n", @@ -2627,7 +2630,7 @@ " acc = train_eval_labelmodel(thresholds)\n", " if acc > best_acc:\n", " best_acc = acc\n", - " best_thresholds = thresholds.copy()\n" + " best_thresholds = thresholds.copy()" ] }, { @@ -2655,7 +2658,7 @@ } ], "source": [ - "np.array(best_thresholds)\n" + "np.array(best_thresholds)" ] }, { @@ -2692,7 +2695,7 @@ "weak_labels.extend_matrix(best_thresholds)\n", "label_model = Snorkel(weak_labels)\n", "label_model.fit(lr=0.002, n_epochs=10, progress_bar=False)\n", - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -2748,7 +2751,7 @@ " y=y_test_for_grid_search,\n", " )\n", "\n", - " return accuracy\n" + " return accuracy" ] }, { @@ -2807,7 +2810,7 @@ " acc = train_eval_downstream(thresholds)\n", " if acc > best_acc:\n", " best_acc = acc\n", - " best_thresholds = thresholds.copy()\n" + " best_thresholds = thresholds.copy()" ] }, { @@ -2835,7 +2838,7 @@ } ], "source": [ - "np.array(best_thresholds)\n" + "np.array(best_thresholds)" ] }, { @@ -2872,7 +2875,7 @@ "weak_labels.extend_matrix(best_thresholds)\n", "label_model = Snorkel(weak_labels)\n", "label_model.fit(lr=0.002, n_epochs=10, progress_bar=False)\n", - "print(train_and_evaluate_downstream_model(label_model))\n" + "print(train_and_evaluate_downstream_model(label_model))" ] }, { @@ -3048,7 +3051,7 @@ "ax[0].add_artist(legend1)\n", "\n", "fig.tight_layout()\n", - "plt.savefig(\"extend_weak_labels.png\", facecolor=\"white\", transparent=False)\n" + "plt.savefig(\"extend_weak_labels.png\", facecolor=\"white\", transparent=False)" ] } ], diff --git a/docs/template.ipynb b/docs/template.ipynb index 13d87e9948..d7d79d48d9 100644 --- a/docs/template.ipynb +++ b/docs/template.ipynb @@ -70,9 +70,9 @@ "evalue": "", "output_type": "error", "traceback": [ - "\u001B[1;31mRunning cells with 'argilla' requires ipykernel package.\n", - "\u001B[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", - "\u001B[1;31mCommand: 'conda install -n argilla ipykernel --update-deps --force-reinstall'" + "\u001b[1;31mRunning cells with 'argilla' requires ipykernel package.\n", + "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", + "\u001b[1;31mCommand: 'conda install -n argilla ipykernel --update-deps --force-reinstall'" ] } ], @@ -113,10 +113,7 @@ "source": [ "# Replace api_url with the url to your HF Spaces URL if using Spaces\n", "# Replace api_key if you configured a custom API key\n", - "rg.init(\n", - " api_url=\"http://localhost:6900\", \n", - " api_key=\"admin.apikey\"\n", - ")" + "rg.init(api_url=\"http://localhost:6900\", api_key=\"admin.apikey\")" ] }, { diff --git a/examples/custom_field/custom_field.ipynb b/examples/custom_field/custom_field.ipynb index 94d272a7d2..4d469b1d97 100644 --- a/examples/custom_field/custom_field.ipynb +++ b/examples/custom_field/custom_field.ipynb @@ -66,8 +66,15 @@ " ),\n", " ],\n", " questions=[\n", - " rg.RatingQuestion(\"rating\", title=\"How would you rate the conversation?\", required=True, values=[1, 2, 3, 4, 5]),\n", - " rg.TextQuestion(\"improved_chosen\", title=\"Rewrite the chosen conversation\", required=False),\n", + " rg.RatingQuestion(\n", + " \"rating\",\n", + " title=\"How would you rate the conversation?\",\n", + " required=True,\n", + " values=[1, 2, 3, 4, 5],\n", + " ),\n", + " rg.TextQuestion(\n", + " \"improved_chosen\", title=\"Rewrite the chosen conversation\", required=False\n", + " ),\n", " ],\n", ")\n", "\n", @@ -163,7 +170,10 @@ " ],\n", " questions=[\n", " rg.RatingQuestion(\n", - " \"rating\", title=\"How would you rate the conversation?\", required=True, values=[1, 2, 3, 4, 5]\n", + " \"rating\",\n", + " title=\"How would you rate the conversation?\",\n", + " required=True,\n", + " values=[1, 2, 3, 4, 5],\n", " ),\n", " rg.TextQuestion(\n", " \"improved_chosen\", title=\"Rewrite the chosen conversation\", required=True\n",