From b1bef0760df7ec62922a12db7542d87b5b7430ad Mon Sep 17 00:00:00 2001 From: harini-venkataraman <115449948+harini-venkataraman@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:51:18 +0530 Subject: [PATCH] [FIX] Env standardization for Backend. (#1050) * env standadization for backend * Sample envs for remote storage * Sample envs for remote storage --------- Signed-off-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Co-authored-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Co-authored-by: gayathrivijayakumar --- .../prompt_studio_core_v2/models.py | 10 ++-- .../prompt_studio_helper.py | 20 ++++---- backend/sample.env | 8 ++-- backend/utils/file_storage/constants.py | 4 +- .../helpers/common_file_helper.py | 47 ------------------- .../helpers/prompt_studio_file_helper.py | 25 ++++++---- prompt-service/sample.env | 5 ++ .../src/unstract/prompt_service/constants.py | 2 - 8 files changed, 44 insertions(+), 77 deletions(-) delete mode 100644 backend/utils/file_storage/helpers/common_file_helper.py diff --git a/backend/prompt_studio/prompt_studio_core_v2/models.py b/backend/prompt_studio/prompt_studio_core_v2/models.py index 81c8b535c..c4a71cf15 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/models.py +++ b/backend/prompt_studio/prompt_studio_core_v2/models.py @@ -9,8 +9,9 @@ from django.db.models import QuerySet from file_management.file_management_helper import FileManagerHelper from prompt_studio.prompt_studio_core_v2.constants import DefaultPrompts -from utils.file_storage.constants import FileStorageType -from utils.file_storage.helpers.common_file_helper import FileStorageHelper +from unstract.sdk.file_storage.constants import StorageType +from unstract.sdk.file_storage.env_helper import EnvHelper +from utils.file_storage.constants import FileStorageKeys from utils.file_storage.helpers.prompt_studio_file_helper import PromptStudioFileHelper from utils.models.base_model import BaseModel from utils.models.organization_mixin import ( @@ -155,8 +156,9 @@ def delete(self, organization_id=None, *args, **kwargs): logger.error(f"Error: {file_path} : {e.strerror}") # Continue with the deletion of the tool else: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) file_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( organization_id, diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index 5fd7fb2e8..f5ccf5419 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -56,11 +56,12 @@ from unstract.sdk.constants import LogLevel from unstract.sdk.exceptions import IndexingError, SdkError from unstract.sdk.file_storage import FileStorage, FileStorageProvider +from unstract.sdk.file_storage.constants import StorageType +from unstract.sdk.file_storage.env_helper import EnvHelper from unstract.sdk.index import Index from unstract.sdk.prompt import PromptTool from unstract.sdk.utils.tool_utils import ToolUtils -from utils.file_storage.constants import FileStorageType -from utils.file_storage.helpers.common_file_helper import FileStorageHelper +from utils.file_storage.constants import FileStorageKeys from utils.file_storage.helpers.prompt_studio_file_helper import PromptStudioFileHelper from utils.local_context import StateStore @@ -395,8 +396,9 @@ def index_document( process_text=process_text, ) else: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) doc_id = PromptStudioHelper.dynamic_indexer( profile_manager=default_profile, @@ -800,8 +802,9 @@ def _fetch_response( process_text=process_text, ) else: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) index_result = PromptStudioHelper.dynamic_indexer( profile_manager=profile_manager, @@ -1154,8 +1157,9 @@ def _fetch_single_pass_response( process_text=process_text, ) else: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) index_result = PromptStudioHelper.dynamic_indexer( profile_manager=default_profile, diff --git a/backend/sample.env b/backend/sample.env index c868bc0ee..9223fee13 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -148,10 +148,6 @@ TOOL_REGISTRY_CONFIG_PATH="/data/tool_registry_config" # Flipt Service FLIPT_SERVICE_AVAILABLE=False -# Remote storage config for tool registry -TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}' - - # File System Configuration for Workflow and API Execution # Directory Prefixes for storing execution files @@ -165,5 +161,9 @@ WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider":"minio","credentials": # Storage Provider for API Execution API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "XXX", "secret": "XXX"}}' +#Remote storage related envs +PERMANENT_REMOTE_STORAGE={"provider":"gcs","credentials":} +REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" + # Storage Provider for Tool registry TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}' diff --git a/backend/utils/file_storage/constants.py b/backend/utils/file_storage/constants.py index 8ba7e9065..8d9c118f5 100644 --- a/backend/utils/file_storage/constants.py +++ b/backend/utils/file_storage/constants.py @@ -2,8 +2,8 @@ class FileStorageKeys: - FILE_STORAGE_PROVIDER = "FILE_STORAGE_PROVIDER" - FILE_STORAGE_CREDENTIALS = "FILE_STORAGE_CREDENTIALS" + PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE" + TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE" class FileStorageType(Enum): diff --git a/backend/utils/file_storage/helpers/common_file_helper.py b/backend/utils/file_storage/helpers/common_file_helper.py deleted file mode 100644 index 2aa1fabbd..000000000 --- a/backend/utils/file_storage/helpers/common_file_helper.py +++ /dev/null @@ -1,47 +0,0 @@ -import json -from typing import Any, Union - -from unstract.sdk.file_storage import ( - FileStorageProvider, - PermanentFileStorage, - SharedTemporaryFileStorage, -) -from utils.file_storage.constants import FileStorageKeys, FileStorageType - -from unstract.core.utilities import UnstractUtils - - -class FileStorageHelper: - # TODO : Optimize this to a singleton class - @staticmethod - def initialize_file_storage( - type: FileStorageType, - ) -> Union[PermanentFileStorage, SharedTemporaryFileStorage]: - provider_data = FileStorageHelper.load_file_storage_envs() - provider = provider_data[FileStorageKeys.FILE_STORAGE_PROVIDER] - provider_value = FileStorageProvider(provider) - credentials = provider_data[FileStorageKeys.FILE_STORAGE_CREDENTIALS] - if type.value == FileStorageType.PERMANENT.value: - file_storage = PermanentFileStorage(provider=provider_value, **credentials) - elif type.value == FileStorageType.TEMPORARY.value: - file_storage = SharedTemporaryFileStorage( - provider=provider_value, **credentials - ) - else: - file_storage = PermanentFileStorage( - provider=FileStorageProvider.LOCAL, **credentials - ) - return file_storage - - @staticmethod - def load_file_storage_envs() -> dict[str, Any]: - cred_env_data: str = UnstractUtils.get_env( - env_key=FileStorageKeys.FILE_STORAGE_CREDENTIALS - ) - credentials = json.loads(cred_env_data) - provider_data: dict[str, Any] = {} - provider_data[FileStorageKeys.FILE_STORAGE_PROVIDER] = credentials["provider"] - provider_data[FileStorageKeys.FILE_STORAGE_CREDENTIALS] = credentials[ - "credentials" - ] - return provider_data diff --git a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py index 94182db2a..8b6811069 100644 --- a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py +++ b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py @@ -6,8 +6,9 @@ from file_management.file_management_helper import FileManagerHelper from unstract.sdk.file_storage import FileStorage -from utils.file_storage.constants import FileStorageConstants, FileStorageType -from utils.file_storage.helpers.common_file_helper import FileStorageHelper +from unstract.sdk.file_storage.constants import StorageType +from unstract.sdk.file_storage.env_helper import EnvHelper +from utils.file_storage.constants import FileStorageConstants, FileStorageKeys from unstract.core.utilities import UnstractUtils @@ -37,8 +38,9 @@ def get_or_create_prompt_studio_subdirectory( extract_file_path = str(Path(file_path) / "extract") summarize_file_path = str(Path(file_path) / "summarize") if is_create: - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) fs_instance.mkdir(file_path, create_parents=True) fs_instance.mkdir(extract_file_path, create_parents=True) @@ -57,8 +59,9 @@ def upload_for_ide( tool_id (str): ID of the prompt studio tool uploaded_file : File to upload to remote """ - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) file_system_path = ( PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( @@ -77,8 +80,9 @@ def fetch_file_contents( ) -> Union[bytes, str]: """Method to fetch file contents from the remote location. The path is constructed in runtime based on the args""" - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) # Fetching legacy file path for lazy copy # This has to be removed once the usage of FS APIs @@ -139,8 +143,9 @@ def delete_for_ide(org_id: str, user_id: str, tool_id: str, file_name: str) -> b """Method to delete file in remote while the corresponsing prompt studio project is deleted or the file is removed from the file manager. This method handles deleted for related files as well.""" - fs_instance = FileStorageHelper.initialize_file_storage( - type=FileStorageType.PERMANENT + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) file_system_path = ( PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( diff --git a/prompt-service/sample.env b/prompt-service/sample.env index 9289af409..4a4476533 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -29,3 +29,8 @@ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python # Flipt Service FLIPT_SERVICE_AVAILABLE=False + + +#Remote storage related envs +PERMANENT_REMOTE_STORAGE={"provider":"gcs","credentials":} +REMOTE_PROMPT_STUDIO_FILE_PATH="/prompt_studio_data/" diff --git a/prompt-service/src/unstract/prompt_service/constants.py b/prompt-service/src/unstract/prompt_service/constants.py index 8bdaa280b..9d567b2de 100644 --- a/prompt-service/src/unstract/prompt_service/constants.py +++ b/prompt-service/src/unstract/prompt_service/constants.py @@ -107,8 +107,6 @@ class DBTableV2: class FileStorageKeys: - FILE_STORAGE_PROVIDER = "FILE_STORAGE_PROVIDER" - FILE_STORAGE_CREDENTIALS = "FILE_STORAGE_CREDENTIALS" PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE" TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE"