Skip to content

Commit

Permalink
chore: streamline kwarg handling (#264)
Browse files Browse the repository at this point in the history
Improves kwarg handling so that kwargs are handled explicitly when needed, and suppressed otherwise.
  • Loading branch information
qued authored Oct 21, 2023
1 parent 326f180 commit 1cf12e9
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 17 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## 0.7.10-dev2
## 0.7.10

* Handle kwargs explicitly when needed, suppress otherwise
* fix: Reduce Chipper memory consumption on x86_64 cpus
* fix: Skips ordering elements coming from Chipper
* fix: After refactoring to introduce Chipper, annotate() weren't able to show text with extra info from elements, this is fixed now.
Expand Down
4 changes: 3 additions & 1 deletion test_unstructured_inference/inference/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os.path
import tempfile
from functools import partial
from unittest.mock import mock_open, patch
from unittest.mock import ANY, mock_open, patch

import numpy as np
import pytest
Expand Down Expand Up @@ -675,6 +675,8 @@ def test_process_file_with_model_routing(monkeypatch, model_type, is_detection_m
fixed_layouts=None,
extract_tables=False,
pdf_image_dpi=200,
extract_images_in_pdf=ANY,
image_output_dir_path=ANY,
)


Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.10-dev2" # pragma: no cover
__version__ = "0.7.10" # pragma: no cover
18 changes: 4 additions & 14 deletions unstructured_inference/inference/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,7 @@ def from_pages(cls, pages: List[PageLayout]) -> DocumentLayout:
def from_file(
cls,
filename: str,
detection_model: Optional[UnstructuredObjectDetectionModel] = None,
element_extraction_model: Optional[UnstructuredElementExtractionModel] = None,
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
extract_tables: bool = False,
pdf_image_dpi: int = 200,
**kwargs,
) -> DocumentLayout:
Expand Down Expand Up @@ -108,11 +105,8 @@ def from_file(
image,
number=i + 1,
document_filename=filename,
detection_model=detection_model,
element_extraction_model=element_extraction_model,
layout=layout,
fixed_layout=fixed_layout,
extract_tables=extract_tables,
**kwargs,
)
pages.append(page)
Expand Down Expand Up @@ -453,10 +447,6 @@ def from_image(
def process_data_with_model(
data: BinaryIO,
model_name: Optional[str],
is_image: bool = False,
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
extract_tables: bool = False,
pdf_image_dpi: int = 200,
**kwargs,
) -> DocumentLayout:
"""Processes pdf file in the form of a file handler (supporting a read method) into a
Expand All @@ -467,10 +457,6 @@ def process_data_with_model(
layout = process_file_with_model(
tmp_file.name,
model_name,
is_image=is_image,
fixed_layouts=fixed_layouts,
extract_tables=extract_tables,
pdf_image_dpi=pdf_image_dpi,
**kwargs,
)

Expand All @@ -484,6 +470,8 @@ def process_file_with_model(
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
extract_tables: bool = False,
pdf_image_dpi: int = 200,
extract_images_in_pdf: bool = False,
image_output_dir_path: Optional[str] = None,
**kwargs,
) -> DocumentLayout:
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
Expand Down Expand Up @@ -514,6 +502,8 @@ def process_file_with_model(
fixed_layouts=fixed_layouts,
extract_tables=extract_tables,
pdf_image_dpi=pdf_image_dpi,
extract_images_in_pdf=extract_images_in_pdf,
image_output_dir_path=image_output_dir_path,
**kwargs,
)
)
Expand Down

0 comments on commit 1cf12e9

Please sign in to comment.