Skip to content

Commit 17f84c6

Browse files
committed
Add split_pdf_page_range parameter
1 parent f98193c commit 17f84c6

File tree

5 files changed

+31
-0
lines changed

5 files changed

+31
-0
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ res = s.general.partition(request=operations.PartitionRequest(
5555
content='0x2cC94b2FEF'.encode(),
5656
file_name='your_file_here',
5757
),
58+
split_pdf_page_range=[
59+
1,
60+
10,
61+
],
5862
strategy=shared.Strategy.AUTO,
5963
),
6064
))
@@ -110,6 +114,10 @@ res = s.general.partition(request=operations.PartitionRequest(
110114
content='0x2cC94b2FEF'.encode(),
111115
file_name='your_file_here',
112116
),
117+
split_pdf_page_range=[
118+
1,
119+
10,
120+
],
113121
strategy=shared.Strategy.AUTO,
114122
),
115123
),
@@ -139,6 +147,10 @@ res = s.general.partition(request=operations.PartitionRequest(
139147
content='0x2cC94b2FEF'.encode(),
140148
file_name='your_file_here',
141149
),
150+
split_pdf_page_range=[
151+
1,
152+
10,
153+
],
142154
strategy=shared.Strategy.AUTO,
143155
),
144156
))

USAGE.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ res = s.general.partition(request=operations.PartitionRequest(
1414
content='0x2cC94b2FEF'.encode(),
1515
file_name='your_file_here',
1616
),
17+
split_pdf_page_range=[
18+
1,
19+
10,
20+
],
1721
strategy=shared.Strategy.AUTO,
1822
),
1923
))

docs/models/shared/partitionparameters.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
| `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: [] | |
2929
| `split_pdf_concurrency_level` | *Optional[int]* | :heavy_minus_sign: | When `split_pdf_page` is set to `True`, this parameter specifies the number of workers used for sending requests when the PDF is split on the client side. It's an internal parameter for the Python client and is not sent to the backend. | |
3030
| `split_pdf_page` | *Optional[bool]* | :heavy_minus_sign: | This parameter determines if the PDF file should be split on the client side. It's an internal parameter for the Python client and is not sent to the backend. | |
31+
| `split_pdf_page_range` | List[*int*] | :heavy_minus_sign: | When `split_pdf_page is set to `True`, this parameter selects a subset of the pdf to send to the API. The parameter is a list of 2 integers within the range [1, length_of_pdf]. It's an internal parameter for the Python client and is not sent to the backend. | [<br/>1,<br/>10<br/>] |
3132
| `starting_page_number` | *Optional[int]* | :heavy_minus_sign: | When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27. | |
3233
| `strategy` | [Optional[shared.Strategy]](../../models/shared/strategy.md) | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | auto |
3334
| `unique_element_ids` | *Optional[bool]* | :heavy_minus_sign: | When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False` | |

overlay_client.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,18 @@ actions:
1212
"description": "This parameter determines if the PDF file should be split on the client side. It's an internal parameter for the Python client and is not sent to the backend.",
1313
"default": true,
1414
}
15+
- target: $["components"]["schemas"]["partition_parameters"]["properties"]
16+
update:
17+
"split_pdf_page_range":
18+
{
19+
"type": "array",
20+
"title": "Split Pdf Page Range",
21+
"description": "When `split_pdf_page is set to `True`, this parameter selects a subset of the pdf to send to the API. The parameter is a list of 2 integers within the range [1, length_of_pdf]. It's an internal parameter for the Python client and is not sent to the backend.",
22+
"items": {"type": "integer"},
23+
"minItems": 2,
24+
"maxItems": 2,
25+
"example": [1, 10],
26+
}
1527
- target: $["components"]["schemas"]["partition_parameters"]["properties"]
1628
update:
1729
"split_pdf_concurrency_level":

src/unstructured_client/models/shared/partition_parameters.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ class PartitionParameters:
8585
r"""When `split_pdf_page` is set to `True`, this parameter specifies the number of workers used for sending requests when the PDF is split on the client side. It's an internal parameter for the Python client and is not sent to the backend."""
8686
split_pdf_page: Optional[bool] = dataclasses.field(default=True, metadata={'multipart_form': { 'field_name': 'split_pdf_page' }})
8787
r"""This parameter determines if the PDF file should be split on the client side. It's an internal parameter for the Python client and is not sent to the backend."""
88+
split_pdf_page_range: Optional[List[int]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'split_pdf_page_range' }})
89+
r"""When `split_pdf_page is set to `True`, this parameter selects a subset of the pdf to send to the API. The parameter is a list of 2 integers within the range [1, length_of_pdf]. It's an internal parameter for the Python client and is not sent to the backend."""
8890
starting_page_number: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'starting_page_number' }})
8991
r"""When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27."""
9092
strategy: Optional[Strategy] = dataclasses.field(default=Strategy.AUTO, metadata={'multipart_form': { 'field_name': 'strategy' }})

0 commit comments

Comments
 (0)