Skip to content

Commit 48baa61

Browse files
authored
server : test script : add timeout for all requests (#9282)
1 parent f148516 commit 48baa61

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

examples/server/tests/features/parallel.feature

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ Feature: Parallel
5252
Then all prompts are predicted with <n_predict> tokens
5353
Examples:
5454
| streaming | n_predict |
55-
| disabled | 200 |
56-
| enabled | 200 |
55+
| disabled | 128 |
56+
| enabled | 64 |
5757

5858
Scenario Outline: Multi users OAI completions compatibility no v1
5959
Given a system prompt You are a writer.

examples/server/tests/features/steps/steps.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424
# pyright: reportRedeclaration=false
2525

26+
DEFAULT_TIMEOUT_SECONDS = aiohttp.ClientTimeout(total=600)
27+
2628
@step("a server listening on {server_fqdn}:{server_port}")
2729
def step_server_config(context, server_fqdn: str, server_port: str):
2830
context.server_fqdn = server_fqdn
@@ -689,7 +691,7 @@ def step_tokenize_set_add_special(context):
689691
@async_run_until_complete
690692
async def step_tokenize(context):
691693
context.tokenized_text = context_text(context)
692-
async with aiohttp.ClientSession() as session:
694+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
693695
tokenize_args = {
694696
"content": context.tokenized_text,
695697
}
@@ -706,7 +708,7 @@ async def step_tokenize(context):
706708
@async_run_until_complete
707709
async def step_detokenize(context):
708710
assert len(context.tokens) > 0
709-
async with aiohttp.ClientSession() as session:
711+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
710712
async with session.post(f'{context.base_url}/detokenize',
711713
json={
712714
"tokens": context.tokens,
@@ -735,7 +737,7 @@ def step_strings_for_tokenization(context):
735737
@step('an OPTIONS request is sent from {origin}')
736738
@async_run_until_complete
737739
async def step_options_request(context, origin):
738-
async with aiohttp.ClientSession() as session:
740+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
739741
headers = {'Authorization': f'Bearer {context.user_api_key}', 'Origin': origin}
740742
async with session.options(f'{context.base_url}/v1/chat/completions',
741743
headers=headers) as response:
@@ -751,7 +753,7 @@ def step_check_options_header_value(context, cors_header, cors_header_value):
751753
@step('prometheus metrics are exposed')
752754
@async_run_until_complete
753755
async def step_prometheus_metrics_exported(context):
754-
async with aiohttp.ClientSession() as session:
756+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
755757
async with await session.get(f'{context.base_url}/metrics') as metrics_response:
756758
assert metrics_response.status == 200
757759
assert metrics_response.headers['Content-Type'] == "text/plain; version=0.0.4"
@@ -824,7 +826,7 @@ async def concurrent_requests(context, f_completion, *args, **kwargs):
824826
@step('the slot {slot_id:d} is saved with filename "{filename}"')
825827
@async_run_until_complete
826828
async def step_save_slot(context, slot_id, filename):
827-
async with aiohttp.ClientSession() as session:
829+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
828830
async with session.post(f'{context.base_url}/slots/{slot_id}?action=save',
829831
json={"filename": filename},
830832
headers={"Content-Type": "application/json"}) as response:
@@ -834,7 +836,7 @@ async def step_save_slot(context, slot_id, filename):
834836
@step('the slot {slot_id:d} is restored with filename "{filename}"')
835837
@async_run_until_complete
836838
async def step_restore_slot(context, slot_id, filename):
837-
async with aiohttp.ClientSession() as session:
839+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
838840
async with session.post(f'{context.base_url}/slots/{slot_id}?action=restore',
839841
json={"filename": filename},
840842
headers={"Content-Type": "application/json"}) as response:
@@ -844,7 +846,7 @@ async def step_restore_slot(context, slot_id, filename):
844846
@step('the slot {slot_id:d} is erased')
845847
@async_run_until_complete
846848
async def step_erase_slot(context, slot_id):
847-
async with aiohttp.ClientSession() as session:
849+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
848850
async with session.post(f'{context.base_url}/slots/{slot_id}?action=erase',
849851
headers={"Content-Type": "application/json"}) as response:
850852
context.response = response
@@ -853,7 +855,7 @@ async def step_erase_slot(context, slot_id):
853855
@step('switch {on_or_off} lora adapter {lora_id:d}')
854856
@async_run_until_complete
855857
async def toggle_lora_adapter(context, on_or_off: str, lora_id: int):
856-
async with aiohttp.ClientSession() as session:
858+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
857859
async with session.post(f'{context.base_url}/lora-adapters',
858860
json=[{'id': lora_id, 'scale': 1 if on_or_off == 'on' else 0}],
859861
headers={"Content-Type": "application/json"}) as response:
@@ -889,7 +891,7 @@ async def request_completion(prompt,
889891
print(f"Set user_api_key: {user_api_key}")
890892
headers['Authorization'] = f'Bearer {user_api_key}'
891893

892-
async with aiohttp.ClientSession() as session:
894+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
893895
async with session.post(f'{base_url}/completion',
894896
json={
895897
"input_prefix": prompt_prefix,
@@ -902,8 +904,7 @@ async def request_completion(prompt,
902904
"temperature": temperature if temperature is not None else 0.8,
903905
"n_probs": 2,
904906
},
905-
headers=headers,
906-
timeout=3600) as response:
907+
headers=headers) as response:
907908
if expect_api_error is None or not expect_api_error:
908909
assert response.status == 200
909910
assert response.headers['Access-Control-Allow-Origin'] == origin
@@ -961,7 +962,7 @@ async def oai_chat_completions(user_prompt,
961962
if async_client:
962963
origin = 'llama.cpp'
963964
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
964-
async with aiohttp.ClientSession() as session:
965+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
965966
async with session.post(f'{base_url}{base_path}',
966967
json=payload,
967968
headers=headers) as response:
@@ -1048,7 +1049,7 @@ async def oai_chat_completions(user_prompt,
10481049

10491050

10501051
async def request_embedding(content, seed, base_url=None) -> list[list[float]]:
1051-
async with aiohttp.ClientSession() as session:
1052+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
10521053
async with session.post(f'{base_url}/embedding',
10531054
json={
10541055
"content": content,
@@ -1068,14 +1069,13 @@ async def request_oai_embeddings(input, seed,
10681069
headers=[]
10691070
if user_api_key is not None:
10701071
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
1071-
async with aiohttp.ClientSession() as session:
1072+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
10721073
async with session.post(f'{base_url}/v1/embeddings',
10731074
json={
10741075
"input": input,
10751076
"model": model,
10761077
},
1077-
headers=headers,
1078-
timeout=3600) as response:
1078+
headers=headers) as response:
10791079
assert response.status == 200, f"received status code not expected: {response.status}"
10801080
assert response.headers['Access-Control-Allow-Origin'] == origin
10811081
assert response.headers['Content-Type'] == "application/json; charset=utf-8"
@@ -1194,7 +1194,7 @@ async def wait_for_slots_status(context,
11941194
if 'GITHUB_ACTIONS' in os.environ:
11951195
timeout *= 2
11961196

1197-
async with aiohttp.ClientSession() as session:
1197+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
11981198
while True:
11991199
async with await session.get(f'{base_url}/slots', params=params) as slots_response:
12001200
status_code = slots_response.status
@@ -1237,7 +1237,7 @@ def assert_embeddings(embeddings):
12371237

12381238

12391239
async def request_slots_status(context, expected_slots):
1240-
async with aiohttp.ClientSession() as session:
1240+
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
12411241
async with await session.get(f'{context.base_url}/slots') as slots_response:
12421242
assert slots_response.status == 200
12431243
slots = await slots_response.json()

0 commit comments

Comments
 (0)