From 1cf12e90abda720086bcf03d1aefcbaaeef4fc4f Mon Sep 17 00:00:00 2001
From: qued <64741807+qued@users.noreply.github.com>
Date: Sat, 21 Oct 2023 02:37:55 -0500
Subject: [PATCH] chore: streamline kwarg handling (#264)

Improves kwarg handling so that kwargs are handled explicitly when needed, and suppressed otherwise.
---
 CHANGELOG.md                                   |  3 ++-
 .../inference/test_layout.py                   |  4 +++-
 unstructured_inference/__version__.py          |  2 +-
 unstructured_inference/inference/layout.py     | 18 ++++--------------
 4 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ca246338..fb399e56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,6 @@
-## 0.7.10-dev2
+## 0.7.10
 
+* Handle kwargs explicitly when needed, suppress otherwise
 * fix: Reduce Chipper memory consumption on x86_64 cpus
 * fix: Skips ordering elements coming from Chipper
 * fix: After refactoring to introduce Chipper, annotate() weren't able to show text with extra info from elements, this is fixed now.
diff --git a/test_unstructured_inference/inference/test_layout.py b/test_unstructured_inference/inference/test_layout.py
index 49f8d6e4..a75b505c 100644
--- a/test_unstructured_inference/inference/test_layout.py
+++ b/test_unstructured_inference/inference/test_layout.py
@@ -2,7 +2,7 @@
 import os.path
 import tempfile
 from functools import partial
-from unittest.mock import mock_open, patch
+from unittest.mock import ANY, mock_open, patch
 
 import numpy as np
 import pytest
@@ -675,6 +675,8 @@ def test_process_file_with_model_routing(monkeypatch, model_type, is_detection_m
             fixed_layouts=None,
             extract_tables=False,
             pdf_image_dpi=200,
+            extract_images_in_pdf=ANY,
+            image_output_dir_path=ANY,
         )
 
 
diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py
index 5a48fbf0..8f560143 100644
--- a/unstructured_inference/__version__.py
+++ b/unstructured_inference/__version__.py
@@ -1 +1 @@
-__version__ = "0.7.10-dev2"  # pragma: no cover
+__version__ = "0.7.10"  # pragma: no cover
diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py
index 5ab39aab..e8303e66 100644
--- a/unstructured_inference/inference/layout.py
+++ b/unstructured_inference/inference/layout.py
@@ -71,10 +71,7 @@ def from_pages(cls, pages: List[PageLayout]) -> DocumentLayout:
     def from_file(
         cls,
         filename: str,
-        detection_model: Optional[UnstructuredObjectDetectionModel] = None,
-        element_extraction_model: Optional[UnstructuredElementExtractionModel] = None,
         fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
-        extract_tables: bool = False,
         pdf_image_dpi: int = 200,
         **kwargs,
     ) -> DocumentLayout:
@@ -108,11 +105,8 @@ def from_file(
                         image,
                         number=i + 1,
                         document_filename=filename,
-                        detection_model=detection_model,
-                        element_extraction_model=element_extraction_model,
                         layout=layout,
                         fixed_layout=fixed_layout,
-                        extract_tables=extract_tables,
                         **kwargs,
                     )
                     pages.append(page)
@@ -453,10 +447,6 @@ def from_image(
 def process_data_with_model(
     data: BinaryIO,
     model_name: Optional[str],
-    is_image: bool = False,
-    fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
-    extract_tables: bool = False,
-    pdf_image_dpi: int = 200,
     **kwargs,
 ) -> DocumentLayout:
     """Processes pdf file in the form of a file handler (supporting a read method) into a
@@ -467,10 +457,6 @@ def process_data_with_model(
         layout = process_file_with_model(
             tmp_file.name,
             model_name,
-            is_image=is_image,
-            fixed_layouts=fixed_layouts,
-            extract_tables=extract_tables,
-            pdf_image_dpi=pdf_image_dpi,
             **kwargs,
         )
 
@@ -484,6 +470,8 @@ def process_file_with_model(
     fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
     extract_tables: bool = False,
     pdf_image_dpi: int = 200,
+    extract_images_in_pdf: bool = False,
+    image_output_dir_path: Optional[str] = None,
     **kwargs,
 ) -> DocumentLayout:
     """Processes pdf file with name filename into a DocumentLayout by using a model identified by
@@ -514,6 +502,8 @@ def process_file_with_model(
             fixed_layouts=fixed_layouts,
             extract_tables=extract_tables,
             pdf_image_dpi=pdf_image_dpi,
+            extract_images_in_pdf=extract_images_in_pdf,
+            image_output_dir_path=image_output_dir_path,
             **kwargs,
         )
     )