From de8918fb4a7a4b65a3dcfd5d2e064212b23dc060 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Wed, 11 Dec 2024 15:39:44 -0600 Subject: [PATCH 01/10] Initial CLIP Block --- inference/core/workflows/core_steps/loader.py | 4 + .../models/foundation/clip/__init__.py | 0 .../core_steps/models/foundation/clip/v1.py | 227 ++++++++++++++++++ .../execution_engine/entities/types.py | 16 ++ 4 files changed, 247 insertions(+) create mode 100644 inference/core/workflows/core_steps/models/foundation/clip/__init__.py create mode 100644 inference/core/workflows/core_steps/models/foundation/clip/v1.py diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py index d72daa36b..a6297eecf 100644 --- a/inference/core/workflows/core_steps/loader.py +++ b/inference/core/workflows/core_steps/loader.py @@ -141,6 +141,9 @@ from inference.core.workflows.core_steps.models.foundation.anthropic_claude.v1 import ( AntropicClaudeBlockV1, ) +from inference.core.workflows.core_steps.models.foundation.clip.v1 import ( + ClipModelBlockV1, +) from inference.core.workflows.core_steps.models.foundation.clip_comparison.v1 import ( ClipComparisonBlockV1, ) @@ -485,6 +488,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]: CircleVisualizationBlockV1, ClipComparisonBlockV1, ClipComparisonBlockV2, + ClipModelBlockV1, CogVLMBlockV1, ColorVisualizationBlockV1, ConvertGrayscaleBlockV1, diff --git a/inference/core/workflows/core_steps/models/foundation/clip/__init__.py b/inference/core/workflows/core_steps/models/foundation/clip/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py new file mode 100644 index 000000000..c3f817171 --- /dev/null +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -0,0 +1,227 @@ +from functools import partial +from typing import List, Literal, Optional, Type, Union + +import numpy as np +from pydantic import ConfigDict, Field + +from inference.core.entities.requests.clip import ClipImageEmbeddingRequest, ClipTextEmbeddingRequest +from inference.core.env import ( + HOSTED_CORE_MODEL_URL, + LOCAL_INFERENCE_API_URL, + WORKFLOWS_REMOTE_API_TARGET, + WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS, +) +from inference.core.managers.base import ModelManager +from inference.core.workflows.core_steps.common.entities import StepExecutionMode +from inference.core.workflows.core_steps.common.utils import ( + load_core_model, + run_in_parallel, +) +from inference.core.workflows.execution_engine.constants import ( + PARENT_ID_KEY, + ROOT_PARENT_ID_KEY, +) +from inference.core.workflows.execution_engine.entities.base import ( + Batch, + OutputDefinition, + WorkflowImageData, +) +from inference.core.workflows.execution_engine.entities.types import ( + EMBEDDING_KIND, + IMAGE_KIND, + STRING_KIND, + ImageInputField, + Selector, +) +from inference.core.workflows.prototypes.block import ( + BlockResult, + WorkflowBlock, + WorkflowBlockManifest, +) +from inference_sdk import InferenceHTTPClient + +LONG_DESCRIPTION = """ +Use a CLIP model to create semantic embeddings of text and images. + +This block accepts an image or string and returns an embedding. +The embedding can be used to compare the similarity between different +images or between images and text. +""" + + +class BlockManifest(WorkflowBlockManifest): + model_config = ConfigDict( + json_schema_extra={ + "name": "CLIP Embedding Model", + "version": "v1", + "short_description": "Generate an embedding of an image or string.", + "long_description": LONG_DESCRIPTION, + "license": "MIT", + "block_type": "model", + } + ) + type: Literal["roboflow_core/clip@v1"] + name: str = Field(description="Unique name of step in workflows") + data: Selector(kind=[IMAGE_KIND, STRING_KIND]) = Field( + title="Data", + description="The string or image to generate an embedding for.", + examples=["$inputs.image", "$steps.cropping.crops"] + ) + + version: Union[ + Literal[ + "RN101", + "RN50", + "RN50x16", + "RN50x4", + "RN50x64", + "ViT-B-16", + "ViT-B-32", + "ViT-L-14-336px", + "ViT-L-14", + ], + Selector(kind=[STRING_KIND]), + ] = Field( + default="ViT-B-32", + description="Variant of CLIP model", + examples=["ViT-B-16", "$inputs.variant"], + ) + + @classmethod + def get_parameters_accepting_batches(cls) -> List[str]: + return ["data"] + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [ + OutputDefinition(name="embedding", kind=[EMBEDDING_KIND]) + ] + + @classmethod + def get_execution_engine_compatibility(cls) -> Optional[str]: + return ">=1.3.0,<2.0.0" + + +class ClipModelBlockV1(WorkflowBlock): + + def __init__( + self, + model_manager: ModelManager, + api_key: Optional[str], + step_execution_mode: StepExecutionMode, + ): + self._model_manager = model_manager + self._api_key = api_key + self._step_execution_mode = step_execution_mode + + @classmethod + def get_init_parameters(cls) -> List[str]: + return ["model_manager", "api_key", "step_execution_mode"] + + @classmethod + def get_manifest(cls) -> Type[WorkflowBlockManifest]: + return BlockManifest + + def run( + self, + data: Batch[Union[WorkflowImageData, str]], + version: str, + ) -> BlockResult: + if self._step_execution_mode is StepExecutionMode.LOCAL: + return self.run_locally(data=data, version=version) + elif self._step_execution_mode is StepExecutionMode.REMOTE: + return self.run_remotely(data=data, version=version) + else: + raise ValueError( + f"Unknown step execution mode: {self._step_execution_mode}" + ) + + def run_locally( + self, + data: Batch[Union[WorkflowImageData, str]], + version: str, + ) -> BlockResult: + predictions = [] + if len(data) == 0: + return [] + + if isinstance(data[0], str): + inference_request = ClipTextEmbeddingRequest( + clip_version_id=version, + text=data, + api_key=self._api_key, + ) + + clip_model_id = load_core_model( + model_manager=self._model_manager, + inference_request=inference_request, + core_model="clip", + ) + + predictions = self._model_manager.infer_from_request_sync( + clip_model_id, inference_request + ) + + return [ {"embedding": p} for p in predictions.embeddings ] + else: + inference_request = ClipImageEmbeddingRequest( + clip_version_id=version, + image=[single_image.to_inference_format(numpy_preferred=True) for single_image in data], + api_key=self._api_key, + ) + + clip_model_id = load_core_model( + model_manager=self._model_manager, + inference_request=inference_request, + core_model="clip", + ) + + predictions = self._model_manager.infer_from_request_sync( + clip_model_id, inference_request + ) + + return [ {"embedding": p} for p in predictions.embeddings ] + + def run_remotely( + self, + data: Batch[Union[WorkflowImageData, str]], + version: str, + ) -> BlockResult: + api_url = ( + LOCAL_INFERENCE_API_URL + if WORKFLOWS_REMOTE_API_TARGET != "hosted" + else HOSTED_CORE_MODEL_URL + ) + client = InferenceHTTPClient( + api_url=api_url, + api_key=self._api_key, + ) + if WORKFLOWS_REMOTE_API_TARGET == "hosted": + client.select_api_v0() + + tasks = [] + for d in data: + if isinstance(d, str): + tasks.append( + partial( + client.get_clip_text_embeddings, + text=d, + clip_version=version, + ) + ) + else: + tasks.append( + partial( + client.get_clip_image_embeddings, + image=d.base64_image, + clip_version=version, + ) + ) + + predictions = run_in_parallel( + tasks=tasks, + max_workers=WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS, + ) + + return [ {"embedding": p} for p in predictions ] + \ No newline at end of file diff --git a/inference/core/workflows/execution_engine/entities/types.py b/inference/core/workflows/execution_engine/entities/types.py index d70948857..c51e99147 100644 --- a/inference/core/workflows/execution_engine/entities/types.py +++ b/inference/core/workflows/execution_engine/entities/types.py @@ -210,6 +210,22 @@ def __hash__(self) -> int: internal_data_type="List[Any]", ) +EMBEDDING_KIND_DOCS = """ +This kind represents a vector embedding. It is a list of floating point numbers. + +Embeddings are used in various machine learning tasks like clustering, classification, +and similarity search. They are used to represent data in a continuous, low-dimensional space. + +Typically, vectors that are close to each other in the embedding space are considered similar. +""" +EMBEDDING_KIND = Kind( + name="embedding", + description="A list of floating point numbers representing a vector embedding.", + docs=EMBEDDING_KIND_DOCS, + serialised_data_type="List[float]", + internal_data_type="List[float]", +) + RGB_COLOR_KIND_DOCS = """ This kind represents RGB color as a tuple (R, G, B). From a47c8c082d337ddbec8e3421ce582f3ff8c10027 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Wed, 11 Dec 2024 15:56:00 -0600 Subject: [PATCH 02/10] Make it work for non-batch oriented inputs also --- .../core_steps/models/foundation/clip/v1.py | 18 ++++++++++++++---- .../v1/compiler/graph_constructor.py | 15 --------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index c3f817171..d3d5810de 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -62,7 +62,7 @@ class BlockManifest(WorkflowBlockManifest): ) type: Literal["roboflow_core/clip@v1"] name: str = Field(description="Unique name of step in workflows") - data: Selector(kind=[IMAGE_KIND, STRING_KIND]) = Field( + data: Union[Selector(kind=[IMAGE_KIND, STRING_KIND]), str] = Field( title="Data", description="The string or image to generate an embedding for.", examples=["$inputs.image", "$steps.cropping.crops"] @@ -141,10 +141,14 @@ def run_locally( data: Batch[Union[WorkflowImageData, str]], version: str, ) -> BlockResult: - predictions = [] - if len(data) == 0: - return [] + # if not array, wrap + convert_to_singleton = False + if not isinstance(data, Batch): + data = [data] + convert_to_singleton = True + print("CONVERT TO SINGLETON", convert_to_singleton, type(data), data) + if isinstance(data[0], str): inference_request = ClipTextEmbeddingRequest( clip_version_id=version, @@ -161,6 +165,9 @@ def run_locally( predictions = self._model_manager.infer_from_request_sync( clip_model_id, inference_request ) + + if convert_to_singleton: + return {"embedding": predictions.embeddings[0]} return [ {"embedding": p} for p in predictions.embeddings ] else: @@ -180,6 +187,9 @@ def run_locally( clip_model_id, inference_request ) + if convert_to_singleton: + return {"embedding": predictions.embeddings[0]} + return [ {"embedding": p} for p in predictions.embeddings ] def run_remotely( diff --git a/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py b/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py index f1a253b41..fcded3073 100644 --- a/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py +++ b/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py @@ -710,21 +710,6 @@ def denote_data_flow_for_step( f"step inputs are filled with outputs of batch-oriented steps or batch-oriented inputs.", context="workflow_compilation | execution_graph_construction", ) - if ( - step_accepts_batch_input - and batch_input_expected == {True} - and False in actual_input_is_batch - ): - raise ExecutionGraphStructureError( - public_message=f"Detected invalid reference plugged " - f"into property `{property_name}` of step `{node}` - the step " - f"property strictly requires batch-oriented inputs, yet the input selector " - f"holds non-batch oriented input - this indicates the " - f"problem with construction of your Workflow - usually the problem occurs when " - f"non-batch oriented step inputs are filled with outputs of non batch-oriented " - f"steps or non batch-oriented inputs.", - context="workflow_compilation | execution_graph_construction", - ) if not parameters_with_batch_inputs: data_lineage = [] else: From 9cf28712bf18445412b3a064bd089415961a20e2 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Thu, 12 Dec 2024 10:40:46 -0600 Subject: [PATCH 03/10] Make style --- .../core_steps/models/foundation/clip/v1.py | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index d3d5810de..6d68cd07a 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -4,7 +4,10 @@ import numpy as np from pydantic import ConfigDict, Field -from inference.core.entities.requests.clip import ClipImageEmbeddingRequest, ClipTextEmbeddingRequest +from inference.core.entities.requests.clip import ( + ClipImageEmbeddingRequest, + ClipTextEmbeddingRequest, +) from inference.core.env import ( HOSTED_CORE_MODEL_URL, LOCAL_INFERENCE_API_URL, @@ -65,9 +68,9 @@ class BlockManifest(WorkflowBlockManifest): data: Union[Selector(kind=[IMAGE_KIND, STRING_KIND]), str] = Field( title="Data", description="The string or image to generate an embedding for.", - examples=["$inputs.image", "$steps.cropping.crops"] + examples=["$inputs.image", "$steps.cropping.crops"], ) - + version: Union[ Literal[ "RN101", @@ -93,9 +96,7 @@ def get_parameters_accepting_batches(cls) -> List[str]: @classmethod def describe_outputs(cls) -> List[OutputDefinition]: - return [ - OutputDefinition(name="embedding", kind=[EMBEDDING_KIND]) - ] + return [OutputDefinition(name="embedding", kind=[EMBEDDING_KIND])] @classmethod def get_execution_engine_compatibility(cls) -> Optional[str]: @@ -146,7 +147,7 @@ def run_locally( if not isinstance(data, Batch): data = [data] convert_to_singleton = True - + print("CONVERT TO SINGLETON", convert_to_singleton, type(data), data) if isinstance(data[0], str): @@ -155,25 +156,28 @@ def run_locally( text=data, api_key=self._api_key, ) - + clip_model_id = load_core_model( model_manager=self._model_manager, inference_request=inference_request, core_model="clip", ) - + predictions = self._model_manager.infer_from_request_sync( clip_model_id, inference_request ) if convert_to_singleton: return {"embedding": predictions.embeddings[0]} - - return [ {"embedding": p} for p in predictions.embeddings ] + + return [{"embedding": p} for p in predictions.embeddings] else: inference_request = ClipImageEmbeddingRequest( clip_version_id=version, - image=[single_image.to_inference_format(numpy_preferred=True) for single_image in data], + image=[ + single_image.to_inference_format(numpy_preferred=True) + for single_image in data + ], api_key=self._api_key, ) @@ -190,7 +194,7 @@ def run_locally( if convert_to_singleton: return {"embedding": predictions.embeddings[0]} - return [ {"embedding": p} for p in predictions.embeddings ] + return [{"embedding": p} for p in predictions.embeddings] def run_remotely( self, @@ -227,11 +231,10 @@ def run_remotely( clip_version=version, ) ) - + predictions = run_in_parallel( tasks=tasks, max_workers=WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS, ) - return [ {"embedding": p} for p in predictions ] - \ No newline at end of file + return [{"embedding": p} for p in predictions] From ace6afc5f6d7ab78b7e7c567eeb14bf24faaacc1 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Thu, 12 Dec 2024 10:55:20 -0600 Subject: [PATCH 04/10] Update tests --- ...st_workflow_with_arbitrary_batch_inputs.py | 39 +++++++++---------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_arbitrary_batch_inputs.py b/tests/workflows/integration_tests/execution/test_workflow_with_arbitrary_batch_inputs.py index 51802efef..ddb7c113e 100644 --- a/tests/workflows/integration_tests/execution/test_workflow_with_arbitrary_batch_inputs.py +++ b/tests/workflows/integration_tests/execution/test_workflow_with_arbitrary_batch_inputs.py @@ -794,13 +794,12 @@ def test_workflow_when_non_batch_oriented_step_feeds_batch_oriented_step_operati "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, } - # when - with pytest.raises(ExecutionGraphStructureError): - _ = ExecutionEngine.init( - workflow_definition=WORKFLOW_WITH_NON_BATCH_ORIENTED_STEP_FEEDING_BATCH_ORIENTED_STEP_OPERATING_BATCH_WISE, - init_parameters=workflow_init_parameters, - max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, - ) + # should not throw an exception + _ = ExecutionEngine.init( + workflow_definition=WORKFLOW_WITH_NON_BATCH_ORIENTED_STEP_FEEDING_BATCH_ORIENTED_STEP_OPERATING_BATCH_WISE, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) WORKFLOW_WITH_NON_BATCH_ORIENTED_STEP_FEEDING_MIXED_INPUT_STEP = { @@ -1327,13 +1326,12 @@ def test_workflow_when_non_batch_oriented_step_feeds_compound_strictly_batch_ori "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, } - # then - with pytest.raises(ExecutionGraphStructureError): - _ = ExecutionEngine.init( - workflow_definition=WORKFLOW_WITH_NON_BATCH_ORIENTED_STEP_FEEDING_COMPOUND_STRICTLY_BATCH_ORIENTED_STEP, - init_parameters=workflow_init_parameters, - max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, - ) + # should not throw an exception + _ = ExecutionEngine.init( + workflow_definition=WORKFLOW_WITH_NON_BATCH_ORIENTED_STEP_FEEDING_COMPOUND_STRICTLY_BATCH_ORIENTED_STEP, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) WORKFLOW_WITH_BATCH_ORIENTED_STEP_FEEDING_COMPOUND_NON_BATCH_ORIENTED_STEP = { @@ -1741,13 +1739,12 @@ def test_workflow_when_non_batch_oriented_input_feeds_compound_strictly_batch_or "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, } - # then - with pytest.raises(ExecutionGraphStructureError): - _ = ExecutionEngine.init( - workflow_definition=WORKFLOW_WITH_NON_BATCH_ORIENTED_INPUT_FEEDING_COMPOUND_STRICTLY_BATCH_ORIENTED_STEP, - init_parameters=workflow_init_parameters, - max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, - ) + # should not throw an exception + _ = ExecutionEngine.init( + workflow_definition=WORKFLOW_WITH_NON_BATCH_ORIENTED_INPUT_FEEDING_COMPOUND_STRICTLY_BATCH_ORIENTED_STEP, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) WORKFLOW_WITH_BATCH_ORIENTED_INPUT_FEEDING_COMPOUND_NON_BATCH_ORIENTED_STEP = { From a044fa82123e59f86a22082598f78cab4236713c Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Thu, 12 Dec 2024 14:52:11 -0600 Subject: [PATCH 05/10] Add cosine similarity and unit tests --- inference/core/workflows/core_steps/loader.py | 4 + .../math/cosine_similarity/__init__.py | 0 .../core_steps/math/cosine_similarity/v1.py | 71 +++++++++ .../core_steps/models/foundation/clip/v1.py | 6 - .../core_steps/math/test_cosine_similarity.py | 79 ++++++++++ .../core_steps/models/foundation/test_clip.py | 143 ++++++++++++++++++ 6 files changed, 297 insertions(+), 6 deletions(-) create mode 100644 inference/core/workflows/core_steps/math/cosine_similarity/__init__.py create mode 100644 inference/core/workflows/core_steps/math/cosine_similarity/v1.py create mode 100644 tests/workflows/unit_tests/core_steps/math/test_cosine_similarity.py create mode 100644 tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py index a6297eecf..80d578057 100644 --- a/inference/core/workflows/core_steps/loader.py +++ b/inference/core/workflows/core_steps/loader.py @@ -138,6 +138,9 @@ from inference.core.workflows.core_steps.fusion.dimension_collapse.v1 import ( DimensionCollapseBlockV1, ) +from inference.core.workflows.core_steps.math.cosine_similarity.v1 import ( + CosineSimilarityBlockV1, +) from inference.core.workflows.core_steps.models.foundation.anthropic_claude.v1 import ( AntropicClaudeBlockV1, ) @@ -477,6 +480,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]: PropertyDefinitionBlockV1, DimensionCollapseBlockV1, FirstNonEmptyOrDefaultBlockV1, + CosineSimilarityBlockV1, AntropicClaudeBlockV1, BackgroundColorVisualizationBlockV1, BarcodeDetectorBlockV1, diff --git a/inference/core/workflows/core_steps/math/cosine_similarity/__init__.py b/inference/core/workflows/core_steps/math/cosine_similarity/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/inference/core/workflows/core_steps/math/cosine_similarity/v1.py b/inference/core/workflows/core_steps/math/cosine_similarity/v1.py new file mode 100644 index 000000000..331301f9f --- /dev/null +++ b/inference/core/workflows/core_steps/math/cosine_similarity/v1.py @@ -0,0 +1,71 @@ +from typing import List, Literal, Optional, Type + +from pydantic import ConfigDict, Field + +from inference.core.workflows.execution_engine.entities.base import ( + OutputDefinition, +) +from inference.core.workflows.execution_engine.entities.types import ( + EMBEDDING_KIND, + FLOAT_KIND, + Selector, +) +from inference.core.workflows.prototypes.block import ( + BlockResult, + WorkflowBlock, + WorkflowBlockManifest, +) +from inference.core.utils.postprocess import cosine_similarity + +LONG_DESCRIPTION = """ +Calculate the cosine similarity between two embeddings. + +A cosine similarity of 1 means the two embeddings are identical, +while a cosine similarity of 0 means the two embeddings are orthogonal. +Greater values indicate greater similarity. +""" + + +class BlockManifest(WorkflowBlockManifest): + model_config = ConfigDict( + json_schema_extra={ + "name": "Cosine Similarity", + "version": "v1", + "short_description": "Calculate the cosine similarity between two embeddings.", + "long_description": LONG_DESCRIPTION, + "license": "MIT", + "block_type": "model", + } + ) + type: Literal["roboflow_core/cosine_similarity@v1"] + name: str = Field(description="Unique name of step in workflows") + embedding_1: Selector(kind=[EMBEDDING_KIND]) = Field( + description="Embedding 1", + examples=["$steps.clip_image.embedding"], + ) + embedding_2: Selector(kind=[EMBEDDING_KIND]) = Field( + description="Embedding 2", + examples=["$steps.clip_text.embedding"], + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [OutputDefinition(name="similarity", kind=[FLOAT_KIND])] + + @classmethod + def get_execution_engine_compatibility(cls) -> Optional[str]: + return ">=1.3.0,<2.0.0" + + +class CosineSimilarityBlockV1(WorkflowBlock): + @classmethod + def get_manifest(cls) -> Type[WorkflowBlockManifest]: + return BlockManifest + + def run( + self, + embedding_1: List[float], + embedding_2: List[float] + ) -> BlockResult: + similarity = cosine_similarity(embedding_1, embedding_2) + return { "similarity": similarity } \ No newline at end of file diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index 6d68cd07a..cc4b33c9e 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -1,7 +1,6 @@ from functools import partial from typing import List, Literal, Optional, Type, Union -import numpy as np from pydantic import ConfigDict, Field from inference.core.entities.requests.clip import ( @@ -20,10 +19,6 @@ load_core_model, run_in_parallel, ) -from inference.core.workflows.execution_engine.constants import ( - PARENT_ID_KEY, - ROOT_PARENT_ID_KEY, -) from inference.core.workflows.execution_engine.entities.base import ( Batch, OutputDefinition, @@ -33,7 +28,6 @@ EMBEDDING_KIND, IMAGE_KIND, STRING_KIND, - ImageInputField, Selector, ) from inference.core.workflows.prototypes.block import ( diff --git a/tests/workflows/unit_tests/core_steps/math/test_cosine_similarity.py b/tests/workflows/unit_tests/core_steps/math/test_cosine_similarity.py new file mode 100644 index 000000000..8df6059d2 --- /dev/null +++ b/tests/workflows/unit_tests/core_steps/math/test_cosine_similarity.py @@ -0,0 +1,79 @@ +import pytest +from pydantic import ValidationError + +from inference.core.workflows.core_steps.math.cosine_similarity.v1 import BlockManifest, CosineSimilarityBlockV1 + + +def test_manifest_parsing_when_data_is_valid(): + # Given + data = { + "type": "roboflow_core/cosine_similarity@v1", + "name": "cosine_step", + "embedding_1": "$steps.clip_image.embedding", + "embedding_2": "$steps.clip_text.embedding", + } + + # When + result = BlockManifest.model_validate(data) + + # Then + assert result.type == "roboflow_core/cosine_similarity@v1" + assert result.name == "cosine_step" + assert result.embedding_1 == "$steps.clip_image.embedding" + assert result.embedding_2 == "$steps.clip_text.embedding" + + +def test_manifest_parsing_when_data_is_invalid(): + # Given invalid data (not a valid embedding selector) + data = { + "type": "roboflow_core/cosine_similarity@v1", + "name": "cosine_step", + "embedding_1": "invalid_data", + "embedding_2": "invalid_data", + } + + # When / Then + with pytest.raises(ValidationError): + BlockManifest.model_validate(data) + + +def test_cosine_similarity_block_run_identical_embeddings(): + # Given identical embeddings + block = CosineSimilarityBlockV1() + embedding_1 = [0.1, 0.3, 0.5] + embedding_2 = [0.1, 0.3, 0.5] + + # When + result = block.run(embedding_1=embedding_1, embedding_2=embedding_2) + + # Then + # Cosine similarity should be close to 1.0 for identical vectors + assert pytest.approx(result["similarity"], 0.0001) == 1.0 + + +def test_cosine_similarity_block_run_orthogonal_embeddings(): + # Given orthogonal embeddings + block = CosineSimilarityBlockV1() + embedding_1 = [1.0, 0.0, 0.0] + embedding_2 = [0.0, 1.0, 0.0] + + # When + result = block.run(embedding_1=embedding_1, embedding_2=embedding_2) + + # Then + # Cosine similarity should be close to 0.0 for orthogonal vectors + assert pytest.approx(result["similarity"], 0.0001) == 0.0 + + +def test_cosine_similarity_block_run_negative_correlation(): + # Given inversely correlated embeddings + block = CosineSimilarityBlockV1() + embedding_1 = [1.0, 1.0, 1.0] + embedding_2 = [-1.0, -1.0, -1.0] + + # When + result = block.run(embedding_1=embedding_1, embedding_2=embedding_2) + + # Then + # Cosine similarity should be close to -1.0 for perfectly negatively correlated vectors + assert pytest.approx(result["similarity"], 0.0001) == -1.0 diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py new file mode 100644 index 000000000..d6f4e9ba0 --- /dev/null +++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py @@ -0,0 +1,143 @@ +import pytest +from unittest.mock import MagicMock, patch +from pydantic import ValidationError + +import numpy as np + +from inference.core.workflows.core_steps.models.foundation.clip.v1 import ( + BlockManifest, + ClipModelBlockV1 +) +from inference.core.workflows.core_steps.common.entities import StepExecutionMode +from inference.core.workflows.execution_engine.entities.base import ( + ImageParentMetadata, + WorkflowImageData +) + + +@pytest.fixture +def mock_model_manager(): + # Mock a model manager that returns a predictable embedding + mock = MagicMock() + mock.infer_from_request_sync.return_value = MagicMock( + embeddings=[[0.1, 0.2, 0.3]] # Sample embedding + ) + return mock + + +@pytest.fixture +def mock_workflow_image_data(): + # Create a mock WorkflowImageData instance + start_image = np.random.randint(0, 255, (1000, 1000, 3), dtype=np.uint8) + return WorkflowImageData( + parent_metadata=ImageParentMetadata(parent_id="some"), + numpy_image=start_image, + ) + + +def test_manifest_parsing_valid(): + data = { + "type": "roboflow_core/clip@v1", + "name": "my_clip_step", + "data": "$inputs.image", + "version": "RN50", + } + + result = BlockManifest.model_validate(data) + assert result.type == "roboflow_core/clip@v1" + assert result.name == "my_clip_step" + assert result.data == "$inputs.image" + assert result.version == "RN50" + + +def test_manifest_parsing_invalid_missing_type(): + data = { + "name": "my_clip_step", + "data": "$inputs.image", + "version": "RN50", + } + with pytest.raises(ValidationError): + BlockManifest.model_validate(data) + + +def test_manifest_parsing_invalid_data_type(): + data = { + "type": "roboflow_core/clip@v1", + "name": "my_clip_step", + "data": 123, # invalid type + "version": "RN50", + } + with pytest.raises(ValidationError): + BlockManifest.model_validate(data) + + +def test_run_locally_with_text(mock_model_manager): + block = ClipModelBlockV1( + model_manager=mock_model_manager, + api_key=None, + step_execution_mode=StepExecutionMode.LOCAL, + ) + + # Run with text input + result = block.run(data="Hello world", version="RN50") + + assert isinstance(result, dict) + assert len(result["embedding"]) == 3 + assert result["embedding"] == [0.1, 0.2, 0.3] + mock_model_manager.infer_from_request_sync.assert_called_once() + + +def test_run_locally_with_image(mock_model_manager, mock_workflow_image_data): + block = ClipModelBlockV1( + model_manager=mock_model_manager, + api_key=None, + step_execution_mode=StepExecutionMode.LOCAL, + ) + + result = block.run(data=mock_workflow_image_data, version="RN50") + + assert isinstance(result, dict) + assert len(result["embedding"]) == 3 + assert result["embedding"] == [0.1, 0.2, 0.3] + mock_model_manager.infer_from_request_sync.assert_called_once() + + +@patch("inference.core.workflows.core_steps.models.foundation.clip.v1.InferenceHTTPClient") +def test_run_remotely_with_text(mock_client_cls, mock_model_manager): + # Mock the remote client and its return value + mock_client = MagicMock() + mock_client.get_clip_text_embeddings.return_value = [0.1, 0.2, 0.3] + mock_client_cls.return_value = mock_client + + block = ClipModelBlockV1( + model_manager=mock_model_manager, + api_key=None, + step_execution_mode=StepExecutionMode.REMOTE, + ) + + result = block.run(data=["Hello world"], version="RN50") + + assert isinstance(result, list) + assert len(result) == 1 + assert result[0]["embedding"] == [0.1, 0.2, 0.3] + mock_client.get_clip_text_embeddings.assert_called_once() + + +@patch("inference.core.workflows.core_steps.models.foundation.clip.v1.InferenceHTTPClient") +def test_run_remotely_with_image(mock_client_cls, mock_model_manager, mock_workflow_image_data): + mock_client = MagicMock() + mock_client.get_clip_image_embeddings.return_value = [0.1, 0.2, 0.3] + mock_client_cls.return_value = mock_client + + block = ClipModelBlockV1( + model_manager=mock_model_manager, + api_key=None, + step_execution_mode=StepExecutionMode.REMOTE, + ) + + result = block.run(data=[mock_workflow_image_data], version="RN50") + + assert isinstance(result, list) + assert len(result) == 1 + assert result[0]["embedding"] == [0.1, 0.2, 0.3] + mock_client.get_clip_image_embeddings.assert_called_once() From b7b5d1cbf517cd3baebc7ff22814a8f9b93882ce Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Thu, 12 Dec 2024 15:19:24 -0600 Subject: [PATCH 06/10] Add integration tests --- .../execution/test_workflow_with_clip.py | 165 +++++++++++++++++- 1 file changed, 161 insertions(+), 4 deletions(-) diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_clip.py b/tests/workflows/integration_tests/execution/test_workflow_with_clip.py index 944f50d0c..af306cce9 100644 --- a/tests/workflows/integration_tests/execution/test_workflow_with_clip.py +++ b/tests/workflows/integration_tests/execution/test_workflow_with_clip.py @@ -11,6 +11,163 @@ ) CLIP_WORKFLOW = { + "version": "1.0", + "inputs": [ + {"type": "InferenceImage", "name": "image_1"}, + {"type": "InferenceImage", "name": "image_2"}, + ], + "steps": [ + { + "type": "roboflow_core/clip@v1", + "name": "embedding_1", + "data": "$inputs.image_1", + "version": "RN50", + }, + { + "type": "roboflow_core/clip@v1", + "name": "embedding_2", + "data": "$inputs.image_2", + "version": "RN50", + }, + { + "type": "roboflow_core/cosine_similarity@v1", + "name": "cosine_similarity", + "embedding_1": "$steps.embedding_1.embedding", + "embedding_2": "$steps.embedding_2.embedding", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "similarity", + "coordinates_system": "own", + "selector": "$steps.cosine_similarity.similarity", + }, + { + "type": "JsonField", + "name": "image_embeddings", + "coordinates_system": "own", + "selector": "$steps.embedding_1.embedding", + }, + ], +} + + +@add_to_workflows_gallery( + category="Basic Workflows", + use_case_title="Workflow with Embeddings", + use_case_description=""" +This Workflow shows how to use an embedding model to compare the +similarity of two images with each other. + """, + workflow_definition=CLIP_WORKFLOW, + workflow_name_in_app="clip", +) +def test_clip_embedding_model( + model_manager: ModelManager, + license_plate_image: np.ndarray, + crowd_image: np.ndarray, +) -> None: + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": None, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=CLIP_WORKFLOW, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={"image_1": license_plate_image, "image_2": crowd_image} + ) + + # then + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 1, "Expected 1 element in the output" + assert set(result[0].keys()) == { + "similarity", + "image_embeddings", + }, "Expected all declared outputs to be delivered" + assert ( + pytest.approx(result[0]["similarity"], 0.01) == 0.444 + ), "Expected similarity to be approximately the defined value" + assert ( + len(result[0]["image_embeddings"]) == 1024 + ), "Expected image embedding to be of dimension 1024 for RN50 model" + +CLIP_TEXT_WORKFLOW = { + "version": "1.0", + "inputs": [ + {"type": "WorkflowParameter", "name": "prompt"}, + ], + "steps": [ + { + "type": "roboflow_core/clip@v1", + "name": "embedding", + "data": "$inputs.prompt", + "version": "RN50", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "text_embeddings", + "coordinates_system": "own", + "selector": "$steps.embedding.embedding", + }, + ], +} + + +def test_clip_text_embedding_model( + model_manager: ModelManager, + license_plate_image: np.ndarray, + crowd_image: np.ndarray, +) -> None: + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": None, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=CLIP_TEXT_WORKFLOW, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={"prompt": "Foo Bar"} + ) + + # then + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 1, "Expected 1 element in the output" + assert set(result[0].keys()) == { + "text_embeddings", + }, "Expected all declared outputs to be delivered" + assert ( + len(result[0]["text_embeddings"]) == 1024 + ), "Expected text embedding to be of dimension 1024 for RN50 model" + assert ( + pytest.approx(np.mean(result[0]["text_embeddings"]), 0.0001) == -0.016772 + ), "Expected embedding to have a value similar to during testing" + assert ( + pytest.approx(np.max(result[0]["text_embeddings"]), 0.0001) == 1.65736556 + ), "Expected embedding to have a value similar to during testing" + assert ( + pytest.approx(np.min(result[0]["text_embeddings"]), 0.0001) == -10.109556 + ), "Expected embedding to have a value similar to during testing" + assert ( + pytest.approx(np.std(result[0]["text_embeddings"]), 0.0001) == 0.39733439 + ), "Expected embedding to have a value similar to during testing" + +CLIP_COMPARISON_WORKFLOW = { "version": "1.0", "inputs": [ {"type": "WorkflowImage", "name": "image"}, @@ -36,16 +193,16 @@ @add_to_workflows_gallery( category="Basic Workflows", - use_case_title="Workflow with CLIP model", + use_case_title="Workflow with CLIP Comparison", use_case_description=""" -This is the basic workflow that only contains a single CLIP model block. +This is the basic workflow that only contains a single CLIP Comparison block. Please take a look at how batch-oriented WorkflowImage data is plugged to detection step via input selector (`$inputs.image`) and how non-batch parameters (reference set of texts that the each image in batch will be compared to) is dynamically specified - via `$inputs.reference` selector. """, - workflow_definition=CLIP_WORKFLOW, + workflow_definition=CLIP_COMPARISON_WORKFLOW, workflow_name_in_app="clip", ) def test_clip_workflow_when_minimal_valid_input_provided( @@ -60,7 +217,7 @@ def test_clip_workflow_when_minimal_valid_input_provided( "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, } execution_engine = ExecutionEngine.init( - workflow_definition=CLIP_WORKFLOW, + workflow_definition=CLIP_COMPARISON_WORKFLOW, init_parameters=workflow_init_parameters, max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, ) From 09716608257fa44c45e811868b2b8c3f36c0507f Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Thu, 12 Dec 2024 15:32:05 -0600 Subject: [PATCH 07/10] Fix style --- .../core_steps/math/cosine_similarity/v1.py | 21 +++++++++---------- .../core_steps/models/foundation/clip/v1.py | 5 +++++ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/inference/core/workflows/core_steps/math/cosine_similarity/v1.py b/inference/core/workflows/core_steps/math/cosine_similarity/v1.py index 331301f9f..b0c4e032d 100644 --- a/inference/core/workflows/core_steps/math/cosine_similarity/v1.py +++ b/inference/core/workflows/core_steps/math/cosine_similarity/v1.py @@ -2,9 +2,8 @@ from pydantic import ConfigDict, Field -from inference.core.workflows.execution_engine.entities.base import ( - OutputDefinition, -) +from inference.core.utils.postprocess import cosine_similarity +from inference.core.workflows.execution_engine.entities.base import OutputDefinition from inference.core.workflows.execution_engine.entities.types import ( EMBEDDING_KIND, FLOAT_KIND, @@ -15,7 +14,6 @@ WorkflowBlock, WorkflowBlockManifest, ) -from inference.core.utils.postprocess import cosine_similarity LONG_DESCRIPTION = """ Calculate the cosine similarity between two embeddings. @@ -34,7 +32,12 @@ class BlockManifest(WorkflowBlockManifest): "short_description": "Calculate the cosine similarity between two embeddings.", "long_description": LONG_DESCRIPTION, "license": "MIT", - "block_type": "model", + "block_type": "math", + "ui_manifest": { + "section": "advanced", + "icon": "far fa-calculator-simple", + "blockPriority": 3, + }, } ) type: Literal["roboflow_core/cosine_similarity@v1"] @@ -62,10 +65,6 @@ class CosineSimilarityBlockV1(WorkflowBlock): def get_manifest(cls) -> Type[WorkflowBlockManifest]: return BlockManifest - def run( - self, - embedding_1: List[float], - embedding_2: List[float] - ) -> BlockResult: + def run(self, embedding_1: List[float], embedding_2: List[float]) -> BlockResult: similarity = cosine_similarity(embedding_1, embedding_2) - return { "similarity": similarity } \ No newline at end of file + return {"similarity": similarity} diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index cc4b33c9e..0896c1962 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -55,6 +55,11 @@ class BlockManifest(WorkflowBlockManifest): "long_description": LONG_DESCRIPTION, "license": "MIT", "block_type": "model", + "ui_manifest": { + "section": "model", + "icon": "far fa-paperclip", + "blockPriority": 2, + }, } ) type: Literal["roboflow_core/clip@v1"] From a893049e67a6739f553d32fbc20ebed26d70fae2 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Thu, 12 Dec 2024 15:33:10 -0600 Subject: [PATCH 08/10] Remove stray print --- .../core/workflows/core_steps/models/foundation/clip/v1.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index 0896c1962..615ef5471 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -147,8 +147,6 @@ def run_locally( data = [data] convert_to_singleton = True - print("CONVERT TO SINGLETON", convert_to_singleton, type(data), data) - if isinstance(data[0], str): inference_request = ClipTextEmbeddingRequest( clip_version_id=version, From 1b43e9ad08129f549d5d9d746ae19cccad9a8694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= Date: Fri, 13 Dec 2024 17:24:56 +0100 Subject: [PATCH 09/10] Make linters happy --- .../v1/compiler/graph_constructor.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py b/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py index bc61d1d94..f1a253b41 100644 --- a/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py +++ b/inference/core/workflows/execution_engine/v1/compiler/graph_constructor.py @@ -711,18 +711,18 @@ def denote_data_flow_for_step( context="workflow_compilation | execution_graph_construction", ) if ( - step_accepts_batch_input - and batch_input_expected == {True} - and False in actual_input_is_batch + step_accepts_batch_input + and batch_input_expected == {True} + and False in actual_input_is_batch ): raise ExecutionGraphStructureError( public_message=f"Detected invalid reference plugged " - f"into property `{property_name}` of step `{node}` - the step " - f"property strictly requires batch-oriented inputs, yet the input selector " - f"holds non-batch oriented input - this indicates the " - f"problem with construction of your Workflow - usually the problem occurs when " - f"non-batch oriented step inputs are filled with outputs of non batch-oriented " - f"steps or non batch-oriented inputs.", + f"into property `{property_name}` of step `{node}` - the step " + f"property strictly requires batch-oriented inputs, yet the input selector " + f"holds non-batch oriented input - this indicates the " + f"problem with construction of your Workflow - usually the problem occurs when " + f"non-batch oriented step inputs are filled with outputs of non batch-oriented " + f"steps or non batch-oriented inputs.", context="workflow_compilation | execution_graph_construction", ) if not parameters_with_batch_inputs: From 55d3a95f2f63b95ab3e849b4ed09cbf9bbb9e7bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= Date: Fri, 13 Dec 2024 18:02:04 +0100 Subject: [PATCH 10/10] Fix issues and add tests --- inference/core/version.py | 2 +- .../core_steps/math/cosine_similarity/v1.py | 5 + .../core_steps/models/foundation/clip/v1.py | 76 ++------ .../execution/test_workflow_with_clip.py | 171 +++++++++++++++++- .../core_steps/models/foundation/test_clip.py | 20 +- 5 files changed, 205 insertions(+), 69 deletions(-) diff --git a/inference/core/version.py b/inference/core/version.py index 00d2732ad..4a5f53a40 100644 --- a/inference/core/version.py +++ b/inference/core/version.py @@ -1,4 +1,4 @@ -__version__ = "0.30.0" +__version__ = "0.31.0" if __name__ == "__main__": diff --git a/inference/core/workflows/core_steps/math/cosine_similarity/v1.py b/inference/core/workflows/core_steps/math/cosine_similarity/v1.py index b0c4e032d..fcf5892b3 100644 --- a/inference/core/workflows/core_steps/math/cosine_similarity/v1.py +++ b/inference/core/workflows/core_steps/math/cosine_similarity/v1.py @@ -66,5 +66,10 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]: return BlockManifest def run(self, embedding_1: List[float], embedding_2: List[float]) -> BlockResult: + if len(embedding_1) != len(embedding_2): + raise RuntimeError( + f"roboflow_core/cosine_similarity@v1 block feed with different shape of embeddings. " + f"`embedding_1`: (N, {len(embedding_1)}), `embedding_2`: (N, {len(embedding_2)})" + ) similarity = cosine_similarity(embedding_1, embedding_2) return {"similarity": similarity} diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index 615ef5471..9e92b1ce5 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -89,10 +89,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["ViT-B-16", "$inputs.variant"], ) - @classmethod - def get_parameters_accepting_batches(cls) -> List[str]: - return ["data"] - @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [OutputDefinition(name="embedding", kind=[EMBEDDING_KIND])] @@ -124,7 +120,7 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]: def run( self, - data: Batch[Union[WorkflowImageData, str]], + data: Union[WorkflowImageData, str], version: str, ) -> BlockResult: if self._step_execution_mode is StepExecutionMode.LOCAL: @@ -138,64 +134,43 @@ def run( def run_locally( self, - data: Batch[Union[WorkflowImageData, str]], + data: Union[WorkflowImageData, str], version: str, ) -> BlockResult: - # if not array, wrap - convert_to_singleton = False - if not isinstance(data, Batch): - data = [data] - convert_to_singleton = True - - if isinstance(data[0], str): + if isinstance(data, str): inference_request = ClipTextEmbeddingRequest( clip_version_id=version, - text=data, + text=[data], api_key=self._api_key, ) - clip_model_id = load_core_model( model_manager=self._model_manager, inference_request=inference_request, core_model="clip", ) - predictions = self._model_manager.infer_from_request_sync( clip_model_id, inference_request ) - - if convert_to_singleton: - return {"embedding": predictions.embeddings[0]} - - return [{"embedding": p} for p in predictions.embeddings] + return {"embedding": predictions.embeddings[0]} else: inference_request = ClipImageEmbeddingRequest( clip_version_id=version, - image=[ - single_image.to_inference_format(numpy_preferred=True) - for single_image in data - ], + image=[data.to_inference_format(numpy_preferred=True)], api_key=self._api_key, ) - clip_model_id = load_core_model( model_manager=self._model_manager, inference_request=inference_request, core_model="clip", ) - predictions = self._model_manager.infer_from_request_sync( clip_model_id, inference_request ) - - if convert_to_singleton: - return {"embedding": predictions.embeddings[0]} - - return [{"embedding": p} for p in predictions.embeddings] + return {"embedding": predictions.embeddings[0]} def run_remotely( self, - data: Batch[Union[WorkflowImageData, str]], + data: Union[WorkflowImageData, str], version: str, ) -> BlockResult: api_url = ( @@ -210,28 +185,15 @@ def run_remotely( if WORKFLOWS_REMOTE_API_TARGET == "hosted": client.select_api_v0() - tasks = [] - for d in data: - if isinstance(d, str): - tasks.append( - partial( - client.get_clip_text_embeddings, - text=d, - clip_version=version, - ) - ) - else: - tasks.append( - partial( - client.get_clip_image_embeddings, - image=d.base64_image, - clip_version=version, - ) - ) - - predictions = run_in_parallel( - tasks=tasks, - max_workers=WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS, - ) + if isinstance(data, str): + result = client.get_clip_text_embeddings( + text=data, + clip_version=version, + ) + else: + result = client.get_clip_image_embeddings( + inference_input=data.base64_image, + clip_version=version, + ) - return [{"embedding": p} for p in predictions] + return {"embedding": result["embeddings"][0]} diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_clip.py b/tests/workflows/integration_tests/execution/test_workflow_with_clip.py index f7055203d..0ffd9b150 100644 --- a/tests/workflows/integration_tests/execution/test_workflow_with_clip.py +++ b/tests/workflows/integration_tests/execution/test_workflow_with_clip.py @@ -4,7 +4,7 @@ from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS from inference.core.managers.base import ModelManager from inference.core.workflows.core_steps.common.entities import StepExecutionMode -from inference.core.workflows.errors import RuntimeInputError +from inference.core.workflows.errors import RuntimeInputError, StepExecutionError from inference.core.workflows.execution_engine.core import ExecutionEngine from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import ( add_to_workflows_gallery, @@ -100,6 +100,175 @@ def test_clip_embedding_model( ), "Expected image embedding to be of dimension 1024 for RN50 model" +CLIP_WORKFLOW_COSINE_SIMILARITY_CROSS_DATA_TYPE = { + "version": "1.0", + "inputs": [ + {"type": "InferenceImage", "name": "image_1"}, + {"type": "WorkflowParameter", "name": "reference"}, + ], + "steps": [ + { + "type": "roboflow_core/clip@v1", + "name": "embedding_1", + "data": "$inputs.image_1", + "version": "RN50", + }, + { + "type": "roboflow_core/clip@v1", + "name": "embedding_2", + "data": "$inputs.reference", + "version": "RN50", + }, + { + "type": "roboflow_core/cosine_similarity@v1", + "name": "cosine_similarity", + "embedding_1": "$steps.embedding_1.embedding", + "embedding_2": "$steps.embedding_2.embedding", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "similarity", + "coordinates_system": "own", + "selector": "$steps.cosine_similarity.similarity", + }, + { + "type": "JsonField", + "name": "image_embeddings", + "coordinates_system": "own", + "selector": "$steps.embedding_1.embedding", + }, + ], +} + + +def test_clip_embedding_model_on_batches_of_cross_type_data( + model_manager: ModelManager, + license_plate_image: np.ndarray, + crowd_image: np.ndarray, +) -> None: + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": None, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=CLIP_WORKFLOW_COSINE_SIMILARITY_CROSS_DATA_TYPE, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + result = execution_engine.run( + runtime_parameters={ + "image_1": [license_plate_image, crowd_image], + "reference": "people", + } + ) + + # then + assert isinstance(result, list), "Expected list to be delivered" + assert len(result) == 2, "Expected 2 elements in the output" + assert set(result[0].keys()) == { + "similarity", + "image_embeddings", + }, "Expected all declared outputs to be delivered" + assert set(result[1].keys()) == { + "similarity", + "image_embeddings", + }, "Expected all declared outputs to be delivered" + assert ( + abs(result[0]["similarity"] - 0.13) < 0.02 + ), "Expected similarity to be approximately the defined value" + assert ( + len(result[0]["image_embeddings"]) == 1024 + ), "Expected image embedding to be of dimension 1024 for RN50 model" + assert ( + abs(result[1]["similarity"] - 0.15) < 0.02 + ), "Expected similarity to be approximately the defined value" + assert ( + len(result[1]["image_embeddings"]) == 1024 + ), "Expected image embedding to be of dimension 1024 for RN50 model" + + +CLIP_WORKFLOW_COSINE_SIMILARITY_CROSS_DATA_TYPE_WITH_INVALID_LENGTH_OF_EMBEDDINGS = { + "version": "1.0", + "inputs": [ + {"type": "InferenceImage", "name": "image_1"}, + {"type": "WorkflowParameter", "name": "reference"}, + ], + "steps": [ + { + "type": "roboflow_core/clip@v1", + "name": "embedding_1", + "data": "$inputs.image_1", + "version": "RN50", + }, + { + "type": "roboflow_core/clip@v1", + "name": "embedding_2", + "data": "$inputs.reference", + "version": "RN50x4", + }, + { + "type": "roboflow_core/cosine_similarity@v1", + "name": "cosine_similarity", + "embedding_1": "$steps.embedding_1.embedding", + "embedding_2": "$steps.embedding_2.embedding", + }, + ], + "outputs": [ + { + "type": "JsonField", + "name": "similarity", + "coordinates_system": "own", + "selector": "$steps.cosine_similarity.similarity", + }, + { + "type": "JsonField", + "name": "image_embeddings", + "coordinates_system": "own", + "selector": "$steps.embedding_1.embedding", + }, + ], +} + + +def test_clip_embedding_model_on_batches_of_cross_type_data_with_different_embeddings_length( + model_manager: ModelManager, + license_plate_image: np.ndarray, + crowd_image: np.ndarray, +) -> None: + # given + workflow_init_parameters = { + "workflows_core.model_manager": model_manager, + "workflows_core.api_key": None, + "workflows_core.step_execution_mode": StepExecutionMode.LOCAL, + } + execution_engine = ExecutionEngine.init( + workflow_definition=CLIP_WORKFLOW_COSINE_SIMILARITY_CROSS_DATA_TYPE_WITH_INVALID_LENGTH_OF_EMBEDDINGS, + init_parameters=workflow_init_parameters, + max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, + ) + + # when + with pytest.raises(StepExecutionError) as error: + _ = execution_engine.run( + runtime_parameters={ + "image_1": [license_plate_image, crowd_image], + "reference": "people", + } + ) + + # then + assert ( + "roboflow_core/cosine_similarity@v1 block feed with different shape of embeddings" + in str(error.value) + ) + + CLIP_TEXT_WORKFLOW = { "version": "1.0", "inputs": [ diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py index 873b27b28..6ee576a7f 100644 --- a/tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py +++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_clip.py @@ -108,7 +108,9 @@ def test_run_locally_with_image(mock_model_manager, mock_workflow_image_data): def test_run_remotely_with_text(mock_client_cls, mock_model_manager): # Mock the remote client and its return value mock_client = MagicMock() - mock_client.get_clip_text_embeddings.return_value = [0.1, 0.2, 0.3] + mock_client.get_clip_text_embeddings.return_value = { + "embeddings": [[0.1, 0.2, 0.3]] + } mock_client_cls.return_value = mock_client block = ClipModelBlockV1( @@ -117,11 +119,9 @@ def test_run_remotely_with_text(mock_client_cls, mock_model_manager): step_execution_mode=StepExecutionMode.REMOTE, ) - result = block.run(data=["Hello world"], version="RN50") + result = block.run(data="Hello world", version="RN50") - assert isinstance(result, list) - assert len(result) == 1 - assert result[0]["embedding"] == [0.1, 0.2, 0.3] + assert result["embedding"] == [0.1, 0.2, 0.3] mock_client.get_clip_text_embeddings.assert_called_once() @@ -132,7 +132,9 @@ def test_run_remotely_with_image( mock_client_cls, mock_model_manager, mock_workflow_image_data ): mock_client = MagicMock() - mock_client.get_clip_image_embeddings.return_value = [0.1, 0.2, 0.3] + mock_client.get_clip_image_embeddings.return_value = { + "embeddings": [[0.1, 0.2, 0.3]] + } mock_client_cls.return_value = mock_client block = ClipModelBlockV1( @@ -141,9 +143,7 @@ def test_run_remotely_with_image( step_execution_mode=StepExecutionMode.REMOTE, ) - result = block.run(data=[mock_workflow_image_data], version="RN50") + result = block.run(data=mock_workflow_image_data, version="RN50") - assert isinstance(result, list) - assert len(result) == 1 - assert result[0]["embedding"] == [0.1, 0.2, 0.3] + assert result["embedding"] == [0.1, 0.2, 0.3] mock_client.get_clip_image_embeddings.assert_called_once()