-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: streamline kwarg handling #264
Changes from 4 commits
b1c8ec0
ebb86c4
90072b7
39efb7a
f139757
ffea61f
237080b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = "0.7.10-dev1" # pragma: no cover | ||
__version__ = "0.7.10-dev2" # pragma: no cover |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,10 +71,7 @@ def from_pages(cls, pages: List[PageLayout]) -> DocumentLayout: | |
def from_file( | ||
cls, | ||
filename: str, | ||
detection_model: Optional[UnstructuredObjectDetectionModel] = None, | ||
element_extraction_model: Optional[UnstructuredElementExtractionModel] = None, | ||
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None, | ||
extract_tables: bool = False, | ||
pdf_image_dpi: int = 200, | ||
**kwargs, | ||
) -> DocumentLayout: | ||
|
@@ -108,11 +105,8 @@ def from_file( | |
image, | ||
number=i + 1, | ||
document_filename=filename, | ||
detection_model=detection_model, | ||
element_extraction_model=element_extraction_model, | ||
layout=layout, | ||
fixed_layout=fixed_layout, | ||
extract_tables=extract_tables, | ||
**kwargs, | ||
) | ||
pages.append(page) | ||
|
@@ -453,10 +447,6 @@ def from_image( | |
def process_data_with_model( | ||
data: BinaryIO, | ||
model_name: Optional[str], | ||
is_image: bool = False, | ||
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None, | ||
extract_tables: bool = False, | ||
pdf_image_dpi: int = 200, | ||
**kwargs, | ||
) -> DocumentLayout: | ||
"""Processes pdf file in the form of a file handler (supporting a read method) into a | ||
|
@@ -467,10 +457,6 @@ def process_data_with_model( | |
layout = process_file_with_model( | ||
tmp_file.name, | ||
model_name, | ||
is_image=is_image, | ||
fixed_layouts=fixed_layouts, | ||
extract_tables=extract_tables, | ||
pdf_image_dpi=pdf_image_dpi, | ||
**kwargs, | ||
) | ||
|
||
|
@@ -484,6 +470,8 @@ def process_file_with_model( | |
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None, | ||
extract_tables: bool = False, | ||
pdf_image_dpi: int = 200, | ||
extract_images_in_pdf: bool = False, | ||
image_output_dir_path: Optional[str] = None, | ||
Comment on lines
+473
to
+474
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the guide line here? When do we explicit set and when do we implicitly trust a kwarg does something down the call stack? And why are those two explicit here? It doesn't seem those two parameters are modified in this function and they are just passing through? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are explicit since we don't want them passed down to |
||
**kwargs, | ||
) -> DocumentLayout: | ||
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by | ||
|
@@ -514,6 +502,8 @@ def process_file_with_model( | |
fixed_layouts=fixed_layouts, | ||
extract_tables=extract_tables, | ||
pdf_image_dpi=pdf_image_dpi, | ||
extract_images_in_pdf=extract_images_in_pdf, | ||
image_output_dir_path=image_output_dir_path, | ||
**kwargs, | ||
) | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please make this is a release version