diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py index f33cada092..cfc5415278 100644 --- a/test_unstructured/partition/test_auto.py +++ b/test_unstructured/partition/test_auto.py @@ -584,6 +584,19 @@ def test_auto_handles_kwarg_with_infer_table_structure(infer_bool): assert mock_process_file_with_model.call_args[1]["infer_table_structure"] is infer_bool +def test_auto_handles_kwarg_with_infer_table_structure_when_none(): + with patch( + "unstructured.partition.pdf_image.ocr.process_file_with_ocr", + ) as mock_process_file_with_model: + partition( + example_doc_path("pdf/layout-parser-paper-fast.pdf"), + pdf_infer_table_structure=True, + strategy=PartitionStrategy.HI_RES, + infer_table_structure=None, + ) + assert mock_process_file_with_model.call_args[1]["infer_table_structure"] is True + + def test_auto_partition_pdf_uses_pdf_infer_table_structure_argument(): with patch( "unstructured.partition.pdf_image.ocr.process_file_with_ocr", diff --git a/unstructured/partition/auto.py b/unstructured/partition/auto.py index 82be86483c..7616d763cf 100644 --- a/unstructured/partition/auto.py +++ b/unstructured/partition/auto.py @@ -178,13 +178,15 @@ def partition( # into a partition function, e.g., partition_email is reused to partition sub-elements, e.g., # partition an image attachment buy calling partition with the kwargs. In that case here kwargs # would have a infer_table_structure already - infer_table_structure = kwargs.pop( - "infer_table_structure", - decide_table_extraction( + kwargs_infer_table_structure = kwargs.pop("infer_table_structure", None) + infer_table_structure = ( + kwargs_infer_table_structure + if kwargs_infer_table_structure is not None + else decide_table_extraction( file_type, skip_infer_table_types, pdf_infer_table_structure, - ), + ) ) partitioner_loader = _PartitionerLoader()