Skip to content

Commit

Permalink
refactor: 💡 separate app logic from terminal logic (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
soul-codes authored Dec 21, 2024
1 parent a19923a commit e3518ed
Show file tree
Hide file tree
Showing 24 changed files with 701 additions and 376 deletions.
7 changes: 7 additions & 0 deletions app/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .analysis_context import AnalysisContext
from .analysis_output_context import AnalysisOutputContext
from .analysis_webserver_context import AnalysisWebServerContext
from .app import App
from .app_context import AppContext
from .project_context import ProjectContext
from .settings_context import SettingsContext
163 changes: 163 additions & 0 deletions app/analysis_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from functools import cached_property
from tempfile import TemporaryDirectory
from typing import Literal

from pydantic import BaseModel

from analyzer_interface import AnalyzerDeclaration, SecondaryAnalyzerDeclaration
from context import (
InputColumnProvider,
PrimaryAnalyzerContext,
SecondaryAnalyzerContext,
)
from storage import AnalysisModel

from .app_context import AppContext
from .project_context import ProjectContext


class AnalysisRunProgressEvent(BaseModel):
analyzer: AnalyzerDeclaration | SecondaryAnalyzerDeclaration
event: Literal["start", "finish"]


class AnalysisContext(BaseModel):
app_context: AppContext
project_context: ProjectContext
model: AnalysisModel
is_deleted: bool = False

@property
def display_name(self):
return self.model.display_name

@property
def id(self):
return self.model.id

@property
def analyzer_id(self):
return self.model.primary_analyzer_id

@property
def analyzer_spec(self):
analyzer = self.app_context.suite.get_primary_analyzer(self.analyzer_id)
assert analyzer, f"Analyzer `{self.analyzer_id}` not found"
return analyzer

@property
def column_mapping(self):
return self.model.column_mapping

@property
def create_time(self):
return self.model.create_time()

@property
def is_draft(self):
return self.model.is_draft

@cached_property
def web_presenters(self):
return self.app_context.suite.find_web_presenters(self.analyzer_spec)

def web_server(self):
from .analysis_webserver_context import AnalysisWebServerContext

return AnalysisWebServerContext(
app_context=self.app_context, analysis_context=self
)

def rename(self, new_name: str):
self.model.display_name = new_name
self.app_context.storage.save_analysis(self.model)

def delete(self):
self.is_deleted = True
self.app_context.storage.delete_analysis(self.model)

def run(self):
assert not self.is_deleted, "Analysis is deleted"
secondary_analyzers = (
self.app_context.suite.find_toposorted_secondary_analyzers(
self.analyzer_spec
)
)

with TemporaryDirectory() as temp_dir:
yield AnalysisRunProgressEvent(analyzer=self.analyzer_spec, event="start")
user_columns_by_name = {
user_column.name: user_column
for user_column in self.project_context.columns
}
analyzer_context = PrimaryAnalyzerContext(
analysis=self.model,
analyzer=self.analyzer_spec,
store=self.app_context.storage,
temp_dir=temp_dir,
input_columns={
analyzer_column_name: InputColumnProvider(
user_column_name=user_column_name,
semantic=user_columns_by_name[user_column_name].semantic,
)
for analyzer_column_name, user_column_name in self.column_mapping.items()
},
)
analyzer_context.prepare()
self.analyzer_spec.entry_point(analyzer_context)
yield AnalysisRunProgressEvent(analyzer=self.analyzer_spec, event="finish")

for secondary in secondary_analyzers:
yield AnalysisRunProgressEvent(analyzer=secondary, event="start")
with TemporaryDirectory() as temp_dir:
analyzer_context = SecondaryAnalyzerContext(
analysis=self.model,
secondary_analyzer=secondary,
temp_dir=temp_dir,
store=self.app_context.storage,
)
analyzer_context.prepare()
secondary.entry_point(analyzer_context)
yield AnalysisRunProgressEvent(analyzer=secondary, event="finish")

self.model.is_draft = False
self.app_context.storage.save_analysis(self.model)

@property
def export_root_path(self):
return self.app_context.storage._get_project_exports_root_path(self.model)

def get_all_exportable_outputs(self):
from .analysis_output_context import AnalysisOutputContext

return [
*(
AnalysisOutputContext(
app_context=self.app_context,
analysis_context=self,
secondary_spec=None,
output_spec=output,
)
for output in self.analyzer_spec.outputs
if not output.internal
),
*(
AnalysisOutputContext(
app_context=self.app_context,
analysis_context=self,
secondary_spec=secondary,
output_spec=output,
)
for secondary_id in self.app_context.storage.list_secondary_analyses(
self.model
)
if (
secondary := self.app_context.suite.get_secondary_analyzer_by_id(
self.analyzer_id, secondary_id
)
)
is not None
for output in secondary.outputs
if not output.internal
),
]
70 changes: 70 additions & 0 deletions app/analysis_output_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from functools import cached_property
from typing import Literal, Optional

from pydantic import BaseModel

from analyzer_interface import AnalyzerOutput, SecondaryAnalyzerInterface
from storage import SupportedOutputExtension

from .analysis_context import AnalysisContext
from .app_context import AppContext
from .utils import parquet_row_count


class AnalysisOutputContext(BaseModel):
app_context: AppContext
analysis_context: AnalysisContext
secondary_spec: Optional[SecondaryAnalyzerInterface]
output_spec: AnalyzerOutput

@property
def descriptive_qualified_name(self):
return f"{self.output_spec.name} ({self.secondary_spec.name if self.secondary_spec else 'Base'})"

def export(
self,
*,
format: SupportedOutputExtension,
chunk_size_override: Optional[int | Literal[False]] = None,
):
export_chunk_size = (
self.app_context.settings.export_chunk_size
if chunk_size_override is None
else chunk_size_override
) or None
if self.secondary_spec is None:
return self.app_context.storage.export_project_primary_output(
self.analysis_context.model,
self.output_spec.id,
extension=format,
spec=self.output_spec,
export_chunk_size=export_chunk_size,
)
else:
return self.app_context.storage.export_project_secondary_output(
self.analysis_context.model,
self.secondary_spec.id,
self.output_spec.id,
extension=format,
spec=self.output_spec,
export_chunk_size=export_chunk_size,
)

@cached_property
def num_rows(
self,
):
if self.secondary_spec is None:
return parquet_row_count(
self.app_context.storage.get_primary_output_parquet_path(
self.analysis_context.model, self.output_spec.id
)
)
else:
return parquet_row_count(
self.app_context.storage.get_secondary_output_parquet_path(
self.analysis_context.model,
self.secondary_spec.id,
self.output_spec.id,
)
)
78 changes: 78 additions & 0 deletions app/analysis_webserver_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging
import os.path
from pathlib import Path
from tempfile import TemporaryDirectory

from dash import Dash
from flask import Flask, render_template
from pydantic import BaseModel
from waitress import serve

from context import WebPresenterContext

from .analysis_context import AnalysisContext
from .app_context import AppContext


class AnalysisWebServerContext(BaseModel):
app_context: AppContext
analysis_context: AnalysisContext

def start(self):
containing_dir = str(Path(__file__).resolve().parent)
static_folder = os.path.join(containing_dir, "web_static")
template_folder = os.path.join(containing_dir, "web_templates")

web_presenters = self.analysis_context.web_presenters
web_server = Flask(
__name__,
template_folder=template_folder,
static_folder=static_folder,
static_url_path="/static",
)
web_server.logger.disabled = True
temp_dirs: list[TemporaryDirectory] = []

for presenter in web_presenters:
dash_app = Dash(
presenter.server_name,
server=web_server,
url_base_pathname=f"/{presenter.id}/",
external_stylesheets=["/static/dashboard_base.css"],
)
temp_dir = TemporaryDirectory()
presenter_context = WebPresenterContext(
analysis=self.analysis_context.model,
web_presenter=presenter,
store=self.app_context.storage,
temp_dir=temp_dir.name,
dash_app=dash_app,
)
temp_dirs.append(temp_dir)
presenter.factory(presenter_context)

project_name = self.analysis_context.project_context.display_name
analyzer_name = self.analysis_context.display_name

@web_server.route("/")
def index():
return render_template(
"index.html",
panels=[(presenter.id, presenter.name) for presenter in web_presenters],
project_name=project_name,
analyzer_name=analyzer_name,
)

server_log = logging.getLogger("waitress")
original_log_level = server_log.level
original_disabled = server_log.disabled
server_log.setLevel(logging.ERROR)
server_log.disabled = True

try:
serve(web_server, host="127.0.0.1", port=8050)
finally:
server_log.setLevel(original_log_level)
server_log.disabled = original_disabled
for temp_dir in temp_dirs:
temp_dir.cleanup()
30 changes: 30 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from tempfile import NamedTemporaryFile

from pydantic import BaseModel

from importing import ImporterSession

from .app_context import AppContext
from .project_context import ProjectContext


class App(BaseModel):
context: AppContext

def list_projects(self):
return [
ProjectContext(model=project, app_context=self.context)
for project in self.context.storage.list_projects()
]

def create_project(self, name: str, importer_session: ImporterSession):
with NamedTemporaryFile(delete=False) as temp_file:
importer_session.import_as_parquet(temp_file.name)
project_model = self.context.storage.init_project(
display_name=name, input_temp_file=temp_file.name
)
return ProjectContext(model=project_model, app_context=self.context)

@property
def file_selector_state(self):
return self.context.storage.file_selector_state
18 changes: 18 additions & 0 deletions app/app_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from functools import cached_property

from pydantic import BaseModel, ConfigDict

from analyzer_interface.suite import AnalyzerSuite
from storage import Storage


class AppContext(BaseModel):
storage: Storage
suite: AnalyzerSuite
model_config = ConfigDict(arbitrary_types_allowed=True)

@cached_property
def settings(self):
from .settings_context import SettingsContext

return SettingsContext(app_context=self)
Loading

0 comments on commit e3518ed

Please sign in to comment.