From 81d1144c2cb1c86903671d8b178b4cfaa0f1706e Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 7 Jan 2025 14:42:48 +0530 Subject: [PATCH 1/3] env standadization for backend --- .../prompt_studio_core_v2/models.py | 10 ++-- .../prompt_studio_helper.py | 10 ++-- backend/utils/file_storage/constants.py | 2 + .../helpers/common_file_helper.py | 47 ------------------- .../helpers/prompt_studio_file_helper.py | 25 ++++++---- 5 files changed, 29 insertions(+), 65 deletions(-) delete mode 100644 backend/utils/file_storage/helpers/common_file_helper.py diff --git a/backend/prompt_studio/prompt_studio_core_v2/models.py b/backend/prompt_studio/prompt_studio_core_v2/models.py index 81c8b535c..c4a71cf15 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/models.py +++ b/backend/prompt_studio/prompt_studio_core_v2/models.py @@ -9,8 +9,9 @@ from django.db.models import QuerySet from file_management.file_management_helper import FileManagerHelper from prompt_studio.prompt_studio_core_v2.constants import DefaultPrompts -from utils.file_storage.constants import FileStorageType -from utils.file_storage.helpers.common_file_helper import FileStorageHelper +from unstract.sdk.file_storage.constants import StorageType +from unstract.sdk.file_storage.env_helper import EnvHelper +from utils.file_storage.constants import FileStorageKeys from utils.file_storage.helpers.prompt_studio_file_helper import PromptStudioFileHelper from utils.models.base_model import BaseModel from utils.models.organization_mixin import ( @@ -155,8 +156,9 @@ def delete(self, organization_id=None, *args, **kwargs): logger.error(f"Error: {file_path} : {e.strerror}") # Continue with the deletion of the tool else: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) file_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( organization_id, diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index 94e9f340a..f71caf8c7 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -56,11 +56,12 @@ from unstract.sdk.constants import LogLevel from unstract.sdk.exceptions import IndexingError, SdkError from unstract.sdk.file_storage import FileStorage, FileStorageProvider +from unstract.sdk.file_storage.constants import StorageType +from unstract.sdk.file_storage.env_helper import EnvHelper from unstract.sdk.index import Index from unstract.sdk.prompt import PromptTool from unstract.sdk.utils.tool_utils import ToolUtils -from utils.file_storage.constants import FileStorageType -from utils.file_storage.helpers.common_file_helper import FileStorageHelper +from utils.file_storage.constants import FileStorageKeys from utils.file_storage.helpers.prompt_studio_file_helper import PromptStudioFileHelper from utils.local_context import StateStore @@ -782,8 +783,9 @@ def _fetch_response( process_text=process_text, ) else: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) index_result = PromptStudioHelper.dynamic_indexer( profile_manager=profile_manager, diff --git a/backend/utils/file_storage/constants.py b/backend/utils/file_storage/constants.py index 8ba7e9065..22812122a 100644 --- a/backend/utils/file_storage/constants.py +++ b/backend/utils/file_storage/constants.py @@ -4,6 +4,8 @@ class FileStorageKeys: FILE_STORAGE_PROVIDER = "FILE_STORAGE_PROVIDER" FILE_STORAGE_CREDENTIALS = "FILE_STORAGE_CREDENTIALS" + PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE" + TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE" class FileStorageType(Enum): diff --git a/backend/utils/file_storage/helpers/common_file_helper.py b/backend/utils/file_storage/helpers/common_file_helper.py deleted file mode 100644 index 2aa1fabbd..000000000 --- a/backend/utils/file_storage/helpers/common_file_helper.py +++ /dev/null @@ -1,47 +0,0 @@ -import json -from typing import Any, Union - -from unstract.sdk.file_storage import ( - FileStorageProvider, - PermanentFileStorage, - SharedTemporaryFileStorage, -) -from utils.file_storage.constants import FileStorageKeys, FileStorageType - -from unstract.core.utilities import UnstractUtils - - -class FileStorageHelper: - # TODO : Optimize this to a singleton class - @staticmethod - def initialize_file_storage( - type: FileStorageType, - ) -> Union[PermanentFileStorage, SharedTemporaryFileStorage]: - provider_data = FileStorageHelper.load_file_storage_envs() - provider = provider_data[FileStorageKeys.FILE_STORAGE_PROVIDER] - provider_value = FileStorageProvider(provider) - credentials = provider_data[FileStorageKeys.FILE_STORAGE_CREDENTIALS] - if type.value == FileStorageType.PERMANENT.value: - file_storage = PermanentFileStorage(provider=provider_value, **credentials) - elif type.value == FileStorageType.TEMPORARY.value: - file_storage = SharedTemporaryFileStorage( - provider=provider_value, **credentials - ) - else: - file_storage = PermanentFileStorage( - provider=FileStorageProvider.LOCAL, **credentials - ) - return file_storage - - @staticmethod - def load_file_storage_envs() -> dict[str, Any]: - cred_env_data: str = UnstractUtils.get_env( - env_key=FileStorageKeys.FILE_STORAGE_CREDENTIALS - ) - credentials = json.loads(cred_env_data) - provider_data: dict[str, Any] = {} - provider_data[FileStorageKeys.FILE_STORAGE_PROVIDER] = credentials["provider"] - provider_data[FileStorageKeys.FILE_STORAGE_CREDENTIALS] = credentials[ - "credentials" - ] - return provider_data diff --git a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py index 94182db2a..8b6811069 100644 --- a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py +++ b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py @@ -6,8 +6,9 @@ from file_management.file_management_helper import FileManagerHelper from unstract.sdk.file_storage import FileStorage -from utils.file_storage.constants import FileStorageConstants, FileStorageType -from utils.file_storage.helpers.common_file_helper import FileStorageHelper +from unstract.sdk.file_storage.constants import StorageType +from unstract.sdk.file_storage.env_helper import EnvHelper +from utils.file_storage.constants import FileStorageConstants, FileStorageKeys from unstract.core.utilities import UnstractUtils @@ -37,8 +38,9 @@ def get_or_create_prompt_studio_subdirectory( extract_file_path = str(Path(file_path) / "extract") summarize_file_path = str(Path(file_path) / "summarize") if is_create: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) fs_instance.mkdir(file_path, create_parents=True) fs_instance.mkdir(extract_file_path, create_parents=True) @@ -57,8 +59,9 @@ def upload_for_ide( tool_id (str): ID of the prompt studio tool uploaded_file : File to upload to remote """ - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) file_system_path = ( PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( @@ -77,8 +80,9 @@ def fetch_file_contents( ) -> Union[bytes, str]: """Method to fetch file contents from the remote location. The path is constructed in runtime based on the args""" - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) # Fetching legacy file path for lazy copy # This has to be removed once the usage of FS APIs @@ -139,8 +143,9 @@ def delete_for_ide(org_id: str, user_id: str, tool_id: str, file_name: str) -> b """Method to delete file in remote while the corresponsing prompt studio project is deleted or the file is removed from the file manager. This method handles deleted for related files as well.""" - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) file_system_path = ( PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( From 61ce9d83b286bb671bb27ff56f89e9bbf36214e7 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 8 Jan 2025 21:46:17 +0530 Subject: [PATCH 2/3] Sample envs for remote storage --- backend/sample.env | 4 ++++ prompt-service/sample.env | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/backend/sample.env b/backend/sample.env index 8a809c195..c16a07188 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -169,3 +169,7 @@ API_STORAGE_FS_CREDENTIAL='{"endpoint_url": "", "key": "", "secret": ""}' # Optional: Legacy storage path (if applicable) LEGACY_STORAGE_PATH="/path/to/legacy/storage" + +#Remote storage related envs +PERMANENT_REMOTE_STORAGE={"provider":"gcs","credentials":} +REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" \ No newline at end of file diff --git a/prompt-service/sample.env b/prompt-service/sample.env index 9289af409..59ce61a5f 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -29,3 +29,8 @@ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python # Flipt Service FLIPT_SERVICE_AVAILABLE=False + + +#Remote storage related envs +PERMANENT_REMOTE_STORAGE={"provider":"gcs","credentials":} +REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" \ No newline at end of file From f420143cc9741b9ea58029163390135f79378e5d Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 8 Jan 2025 21:48:21 +0530 Subject: [PATCH 3/3] Sample envs for remote storage --- backend/sample.env | 2 +- backend/utils/file_storage/constants.py | 2 -- prompt-service/sample.env | 2 +- prompt-service/src/unstract/prompt_service/constants.py | 2 -- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/backend/sample.env b/backend/sample.env index c16a07188..946f4534d 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -172,4 +172,4 @@ LEGACY_STORAGE_PATH="/path/to/legacy/storage" #Remote storage related envs PERMANENT_REMOTE_STORAGE={"provider":"gcs","credentials":} -REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" \ No newline at end of file +REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" diff --git a/backend/utils/file_storage/constants.py b/backend/utils/file_storage/constants.py index 22812122a..8d9c118f5 100644 --- a/backend/utils/file_storage/constants.py +++ b/backend/utils/file_storage/constants.py @@ -2,8 +2,6 @@ class FileStorageKeys: - FILE_STORAGE_PROVIDER = "FILE_STORAGE_PROVIDER" - FILE_STORAGE_CREDENTIALS = "FILE_STORAGE_CREDENTIALS" PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE" TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE" diff --git a/prompt-service/sample.env b/prompt-service/sample.env index 59ce61a5f..4a4476533 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -33,4 +33,4 @@ FLIPT_SERVICE_AVAILABLE=False #Remote storage related envs PERMANENT_REMOTE_STORAGE={"provider":"gcs","credentials":} -REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" \ No newline at end of file +REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" diff --git a/prompt-service/src/unstract/prompt_service/constants.py b/prompt-service/src/unstract/prompt_service/constants.py index 8bdaa280b..9d567b2de 100644 --- a/prompt-service/src/unstract/prompt_service/constants.py +++ b/prompt-service/src/unstract/prompt_service/constants.py @@ -107,8 +107,6 @@ class DBTableV2: class FileStorageKeys: - FILE_STORAGE_PROVIDER = "FILE_STORAGE_PROVIDER" - FILE_STORAGE_CREDENTIALS = "FILE_STORAGE_CREDENTIALS" PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE" TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE"