diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py index a705d23f0..63faa5b7a 100644 --- a/inference/core/workflows/core_steps/loader.py +++ b/inference/core/workflows/core_steps/loader.py @@ -334,6 +334,9 @@ from inference.core.workflows.core_steps.visualizations.ellipse.v1 import ( EllipseVisualizationBlockV1, ) +from inference.core.workflows.core_steps.visualizations.grid.v1 import ( + GridVisualizationBlockV1, +) from inference.core.workflows.core_steps.visualizations.halo.v1 import ( HaloVisualizationBlockV1, ) @@ -514,6 +517,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]: Florence2BlockV2, GoogleGeminiBlockV1, GoogleVisionOCRBlockV1, + GridVisualizationBlockV1, HaloVisualizationBlockV1, ImageBlurBlockV1, ImageContoursDetectionBlockV1, diff --git a/inference/core/workflows/core_steps/visualizations/grid/__init__.py b/inference/core/workflows/core_steps/visualizations/grid/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/inference/core/workflows/core_steps/visualizations/grid/v1.py b/inference/core/workflows/core_steps/visualizations/grid/v1.py new file mode 100644 index 000000000..dff4b67af --- /dev/null +++ b/inference/core/workflows/core_steps/visualizations/grid/v1.py @@ -0,0 +1,202 @@ +import math +import uuid +from typing import List, Literal, Optional, Type, Union + +import cv2 +import numpy as np +from pydantic import ConfigDict, Field + +from inference.core.cache.lru_cache import LRUCache +from inference.core.workflows.core_steps.visualizations.common.base import ( + OUTPUT_IMAGE_KEY, +) +from inference.core.workflows.execution_engine.entities.base import ( + ImageParentMetadata, + OutputDefinition, + WorkflowImageData, +) +from inference.core.workflows.execution_engine.entities.types import ( + IMAGE_KIND, + INTEGER_KIND, + LIST_OF_VALUES_KIND, + Selector, +) +from inference.core.workflows.prototypes.block import ( + BlockResult, + WorkflowBlock, + WorkflowBlockManifest, +) + +TYPE: str = "roboflow_core/grid_visualization@v1" +SHORT_DESCRIPTION = "Shows an array of images in a grid." +LONG_DESCRIPTION = """ +The `GridVisualization` block displays an array of images in a grid. +It will automatically resize the images to in the specified width and +height. The first image will be in the top left corner, and the rest +will be added to the right of the previous image until the row is full. +""" + + +class GridVisualizationManifest(WorkflowBlockManifest): + type: Literal[f"{TYPE}"] + model_config = ConfigDict( + json_schema_extra={ + "name": "Grid Visualization", + "version": "v1", + "short_description": SHORT_DESCRIPTION, + "long_description": LONG_DESCRIPTION, + "license": "Apache-2.0", + "block_type": "visualization", + "ui_manifest": { + "section": "visualization", + "icon": "far fa-grid", + }, + } + ) + + images: Selector(kind=[LIST_OF_VALUES_KIND]) = Field( # type: ignore + description="Images to visualize", + examples=["$steps.buffer.output"], + ) + + width: Union[int, Selector(kind=[INTEGER_KIND])] = Field( # type: ignore + description="Width of the output image.", + default=2560, + examples=[2560, "$inputs.width"], + ) + + height: Union[int, Selector(kind=[INTEGER_KIND])] = Field( # type: ignore + description="Height of the output image.", + default=1440, + examples=[1440, "$inputs.height"], + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [ + OutputDefinition( + name=OUTPUT_IMAGE_KEY, + kind=[ + IMAGE_KIND, + ], + ), + ] + + @classmethod + def get_execution_engine_compatibility(cls) -> Optional[str]: + return ">=1.3.0,<2.0.0" + + +class GridVisualizationBlockV1(WorkflowBlock): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.prev_input = None + self.prev_output = None + + self.thumbCache = LRUCache() + + @classmethod + def get_manifest(cls) -> Type[WorkflowBlockManifest]: + return GridVisualizationManifest + + def run( + self, images: List[WorkflowImageData], width: int, height: int + ) -> BlockResult: + # use previous result if input hasn't changed + if self.prev_output is not None: + if len(self.prev_input) == len(images) and all( + self.prev_input[i] == images[i] for i in range(len(images)) + ): + return {OUTPUT_IMAGE_KEY: self.prev_output} + + self.thumbCache.set_max_size(len(images) + 1) + output = self.getImageFor(images, width, height) + + self.prev_input = images + self.prev_output = output + + return {OUTPUT_IMAGE_KEY: output} + + def getImageFor( + self, images: List[WorkflowImageData], width: int, height: int + ) -> WorkflowImageData: + if images is None or len(images) == 0: + return self.getEmptyImage(width, height) + else: + np_image = self.createGrid(images, width, height) + return WorkflowImageData.copy_and_replace( + origin_image_data=images[0], numpy_image=np_image + ) + + def getEmptyImage(self, width: int, height: int) -> WorkflowImageData: + return WorkflowImageData( + parent_metadata=ImageParentMetadata(parent_id=str(uuid.uuid4())), + numpy_image=np.zeros((height, width, 3), dtype=np.uint8), + ) + + def createGrid( + self, images: List[WorkflowImageData], width: int, height: int + ) -> WorkflowImageData: + grid_size = math.ceil(math.sqrt(len(images))) + img = np.zeros((height, width, 3), dtype=np.uint8) + + cell_width = width // grid_size + cell_height = height // grid_size + + for r in range(grid_size): + for c in range(grid_size): + index = r * grid_size + c + + if index >= len(images): + break + + if images[index] is None: + continue + + cacheKey = f"{id(images[index])}_{cell_width}_{cell_height}" + if self.thumbCache.get(cacheKey) is None: + self.thumbCache.set( + cacheKey, + self.resizeImage( + images[index].numpy_image, cell_width, cell_height + ), + ) + img_data = self.thumbCache.get(cacheKey) + + img_data_height, img_data_width, _ = img_data.shape + + # place image in cell (centered) + start_x = c * cell_width + (cell_width - img_data_width) // 2 + start_y = r * cell_height + (cell_height - img_data_height) // 2 + + # Clamp to avoid negative indices + start_x = max(start_x, 0) + start_y = max(start_y, 0) + + end_x = start_x + img_data_width + end_y = start_y + img_data_height + + # Ensure we do not exceed the canvas boundaries + end_x = min(end_x, width) + end_y = min(end_y, height) + + # If for some reason the image doesn't fit perfectly, we crop it + target_height = end_y - start_y + target_width = end_x - start_x + + img[start_y:end_y, start_x:end_x] = img_data[ + :target_height, :target_width + ] + + return img + + def resizeImage(self, img: np.ndarray, width: int, height: int) -> np.ndarray: + img_height, img_width, _ = img.shape + scale_w = width / img_width + scale_h = height / img_height + scale = min(scale_w, scale_h) # choose the scale that fits both dimensions + + new_width = int(img_width * scale) + new_height = int(img_height * scale) + + return cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA) diff --git a/tests/workflows/unit_tests/core_steps/visualizations/test_grid.py b/tests/workflows/unit_tests/core_steps/visualizations/test_grid.py new file mode 100644 index 000000000..de541b405 --- /dev/null +++ b/tests/workflows/unit_tests/core_steps/visualizations/test_grid.py @@ -0,0 +1,97 @@ +import numpy as np + +from inference.core.workflows.core_steps.visualizations.grid.v1 import ( + GridVisualizationBlockV1 +) +from inference.core.workflows.execution_engine.entities.base import ( + ImageParentMetadata, + WorkflowImageData, +) + +def test_grid_visualization_block_single() -> None: + # given + block = GridVisualizationBlockV1() + + image = WorkflowImageData( + parent_metadata=ImageParentMetadata(parent_id="some"), + numpy_image=np.zeros((1000, 1000, 3), dtype=np.uint8), + ) + + output = block.run( + images=[image], + width=1000, + height=1000 + ) + + assert output is not None + assert "image" in output + assert hasattr(output.get("image"), "numpy_image") + + # dimensions of output match input + assert output.get("image").numpy_image.shape == (1000, 1000, 3) + # check that the output is the same as the input + assert np.array_equal( + output.get("image").numpy_image, np.zeros((1000, 1000, 3), dtype=np.uint8) + ) + +def test_grid_visualization_block_2x2() -> None: + # given + block = GridVisualizationBlockV1() + + # 1000x1000 black + image_1 = WorkflowImageData( + parent_metadata=ImageParentMetadata(parent_id="some"), + numpy_image=np.zeros((1000, 1000, 3), dtype=np.uint8), + ) + # 1000x1000 white + image_2 = WorkflowImageData( + parent_metadata=ImageParentMetadata(parent_id="some"), + numpy_image=np.array([[[255, 255, 255]] * 1000] * 1000, dtype=np.uint8), + ) + # 1000x1000 red + image_3 = WorkflowImageData( + parent_metadata=ImageParentMetadata(parent_id="some"), + numpy_image=np.array([[[255, 0, 0]] * 1000] * 1000, dtype=np.uint8), + ) + # 1000x1000 green + image_4 = WorkflowImageData( + parent_metadata=ImageParentMetadata(parent_id="some"), + numpy_image=np.array([[[0, 255, 0]] * 1000] * 1000, dtype=np.uint8), + ) + + output = block.run( + images=[image_1, image_2, image_3, image_4], + width=400, + height=400 + ) + + assert output is not None + assert "image" in output + assert hasattr(output.get("image"), "numpy_image") + + # dimensions of output match params + assert output.get("image").numpy_image.shape == (400, 400, 3) + + # check that each quadrant is the right color + # top left: black + assert np.array_equal( + output.get("image").numpy_image[:200, :200, :], + np.zeros((200, 200, 3), dtype=np.uint8) + ) + # top right: white + assert np.array_equal( + output.get("image").numpy_image[:200, 200:, :], + np.array([[[255, 255, 255]] * 200] * 200, dtype=np.uint8) + ) + # bottom left: red + assert np.array_equal( + output.get("image").numpy_image[200:, :200, :], + np.array([[[255, 0, 0]] * 200] * 200, dtype=np.uint8) + ) + # bottom right: green + assert np.array_equal( + output.get("image").numpy_image[200:, 200:, :], + np.array([[[0, 255, 0]] * 200] * 200, dtype=np.uint8) + ) + +