@@ -361,7 +361,11 @@ def pipeline_api(
361
361
362
362
hi_res_model_name = _validate_hi_res_model_name (hi_res_model_name , coordinates )
363
363
strategy = _validate_strategy (strategy )
364
- pdf_infer_table_structure = _set_pdf_infer_table_structure (pdf_infer_table_structure , strategy )
364
+ pdf_infer_table_structure = _set_pdf_infer_table_structure (
365
+ pdf_infer_table_structure ,
366
+ strategy ,
367
+ skip_infer_table_types ,
368
+ )
365
369
366
370
# Parallel mode is set by env variable
367
371
enable_parallel_mode = os .environ .get ("UNSTRUCTURED_PARALLEL_MODE_ENABLED" , "false" )
@@ -441,9 +445,9 @@ def pipeline_api(
441
445
)
442
446
elif hi_res_model_name and hi_res_model_name in CHIPPER_MODEL_TYPES :
443
447
with ChipperMemoryProtection ():
444
- elements = partition (** partition_kwargs ) # pyright: ignore[reportGeneralTypeIssues]
448
+ elements = partition (** partition_kwargs ) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
445
449
else :
446
- elements = partition (** partition_kwargs ) # pyright: ignore[reportGeneralTypeIssues]
450
+ elements = partition (** partition_kwargs ) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
447
451
448
452
except OSError as e :
449
453
if isinstance (e .args [0 ], str ) and (
@@ -595,8 +599,13 @@ def _validate_chunking_strategy(chunking_strategy: Optional[str]) -> Optional[st
595
599
return chunking_strategy
596
600
597
601
598
- def _set_pdf_infer_table_structure (pdf_infer_table_structure : bool , strategy : str ) -> bool :
602
+ def _set_pdf_infer_table_structure (
603
+ pdf_infer_table_structure : bool , strategy : str , skip_infer_table_types : Optional [List [str ]]
604
+ ) -> bool :
599
605
"""Avoids table inference in "fast" and "ocr_only" runs."""
606
+ # NOTE(robinson) - line below is for type checking
607
+ skip_infer_table_types = [] if skip_infer_table_types is None else skip_infer_table_types
608
+ pdf_infer_table_structure = pdf_infer_table_structure and ("pdf" not in skip_infer_table_types )
600
609
return strategy in ("hi_res" , "auto" ) and pdf_infer_table_structure
601
610
602
611
@@ -704,7 +713,7 @@ def return_content_type(filename: str):
704
713
705
714
706
715
@router .get ("/general/v0/general" , include_in_schema = False )
707
- @router .get ("/general/v0.0.68 /general" , include_in_schema = False )
716
+ @router .get ("/general/v0.0.69 /general" , include_in_schema = False )
708
717
async def handle_invalid_get_request ():
709
718
raise HTTPException (
710
719
status_code = status .HTTP_405_METHOD_NOT_ALLOWED , detail = "Only POST requests are supported."
@@ -719,7 +728,7 @@ async def handle_invalid_get_request():
719
728
description = "Description" ,
720
729
operation_id = "partition_parameters" ,
721
730
)
722
- @router .post ("/general/v0.0.68 /general" , include_in_schema = False )
731
+ @router .post ("/general/v0.0.69 /general" , include_in_schema = False )
723
732
def general_partition (
724
733
request : Request ,
725
734
# cannot use annotated type here because of a bug described here:
0 commit comments