Skip to content

Commit ab11a4d

Browse files
committed
Update some log messages, unit test assertions
1 parent 4a20be0 commit ab11a4d

File tree

4 files changed

+18
-15
lines changed

4 files changed

+18
-15
lines changed

_test_unstructured_client/integration/test_decorators.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,10 @@ def test_integration_split_pdf_with_page_range(
168168
try:
169169
resp = client.general.partition(req)
170170
except ValueError as exc:
171-
if not expected_ok:
172-
assert "is out of bounds." in caplog.text
173-
assert "is out of bounds." in str(exc)
174-
return
175-
else:
176-
assert exc is None
171+
assert not expected_ok
172+
assert "is out of bounds." in caplog.text
173+
assert "is out of bounds." in str(exc)
174+
return
177175

178176
page_numbers = set([e["metadata"]["page_number"] for e in resp.elements])
179177

_test_unstructured_client/unit/test_split_pdf_hook.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -427,10 +427,8 @@ def test_unit_get_page_range_returns_valid_range(page_range, expected_result):
427427
max_pages=20,
428428
)
429429
except ValueError as exc:
430-
if not expected_result:
431-
assert "is out of bounds." in str(exc) or "is not a valid page range." in str(exc)
432-
return
433-
else:
434-
assert exc is None
430+
assert not expected_result
431+
assert "is out of bounds." in str(exc) or "is not a valid page range." in str(exc)
432+
return
435433

436434
assert result == expected_result

src/unstructured_client/_hooks/custom/form_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def get_page_range(form_data: FormData, key: str, max_pages: int) -> tuple[int,
4747
start, end = page_range
4848

4949
if not 0 < start <= max_pages or not 0 < end <= max_pages or not start <= end:
50-
msg = f"Page range {page_range} is out of bounds. Valid range is (1 - {max_pages})."
50+
msg = f"Page range {page_range} is out of bounds. Start and end values should be between 1 and {max_pages}."
5151
logger.error(msg)
5252
raise ValueError(msg)
5353

src/unstructured_client/_hooks/custom/split_pdf_hook.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def before_request(
165165
max_pages=len(pdf.pages),
166166
)
167167

168-
page_count = min(len(pdf.pages), page_range_end - page_range_start + 1)
168+
page_count = page_range_end - page_range_start + 1
169169
logger.info(
170170
"Splitting pages %d to %d (%d total)",
171171
page_range_start,
@@ -189,11 +189,18 @@ def before_request(
189189

190190
pages = pdf_utils.get_pdf_pages(pdf, split_size=split_size, page_start=page_range_start, page_end=page_range_end)
191191
logger.info(
192-
"Partitioning %d, %d-paged sets.",
193-
math.ceil(page_count / split_size),
192+
"Partitioning %d files with %d page(s) each.",
193+
math.floor(page_count / split_size),
194194
split_size,
195195
)
196196

197+
# Log the remainder pages if there are any
198+
if page_count % split_size > 0:
199+
logger.info(
200+
"Partitioning 1 file with %d page(s).",
201+
page_count % split_size,
202+
)
203+
197204
async def call_api_partial(page):
198205
async with httpx.AsyncClient() as client:
199206
status_code, json_response = await request_utils.call_api_async(

0 commit comments

Comments
 (0)