Skip to content

Commit 900a8ef

Browse files
committed
fix defaults for table extraction
1 parent a427a9f commit 900a8ef

File tree

1 file changed

+1
-7
lines changed

1 file changed

+1
-7
lines changed

prepline_general/api/general.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def pipeline_api(
295295
hi_res_model_name: Optional[str] = None,
296296
include_page_breaks: bool = False,
297297
ocr_languages: Optional[List[str]] = None,
298-
pdf_infer_table_structure: bool = True,
298+
pdf_infer_table_structure: bool = False,
299299
skip_infer_table_types: Optional[List[str]] = None,
300300
strategy: str = "auto",
301301
xml_keep_tags: bool = False,
@@ -361,7 +361,6 @@ def pipeline_api(
361361

362362
hi_res_model_name = _validate_hi_res_model_name(hi_res_model_name, coordinates)
363363
strategy = _validate_strategy(strategy)
364-
pdf_infer_table_structure = _set_pdf_infer_table_structure(pdf_infer_table_structure, strategy)
365364

366365
# Parallel mode is set by env variable
367366
enable_parallel_mode = os.environ.get("UNSTRUCTURED_PARALLEL_MODE_ENABLED", "false")
@@ -595,11 +594,6 @@ def _validate_chunking_strategy(chunking_strategy: Optional[str]) -> Optional[st
595594
return chunking_strategy
596595

597596

598-
def _set_pdf_infer_table_structure(pdf_infer_table_structure: bool, strategy: str) -> bool:
599-
"""Avoids table inference in "fast" and "ocr_only" runs."""
600-
return strategy in ("hi_res", "auto") and pdf_infer_table_structure
601-
602-
603597
def get_validated_mimetype(file: UploadFile) -> Optional[str]:
604598
"""The MIME-type of `file`.
605599

0 commit comments

Comments
 (0)